[PATCH] mm: cache largest vma

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Davidlohr Bueso <davidlohr@hp.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>,
	Michel Lespinasse <walken@google.com>,
	Ingo Molnar <mingo@kernel.org>, Mel Gorman <mgorman@suse.de>,
	Rik van Riel <riel@redhat.com>, Guan Xuetao <gxt@mprc.pku.edu.cn>,
	aswin@hp.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH] mm: cache largest vma
Date: Fri, 01 Nov 2013 13:17:19 -0700	[thread overview]
Message-ID: <1383337039.2653.18.camel@buesod1.americas.hpqcorp.net> (raw)

While caching the last used vma already does a nice job avoiding
having to iterate the rbtree in find_vma, we can improve. After
studying the hit rate on a load of workloads and environments,
it was seen that it was around 45-50% - constant for a standard
desktop system (gnome3 + evolution + firefox + a few xterms),
and multiple java related workloads (including Hadoop/terasort),
and aim7, which indicates it's better than the 35% value documented
in the code.

By also caching the largest vma, that is, the one that contains
most addresses, there is a steady 10-15% hit rate gain, putting
it above the 60% region. This improvement comes at a very low
overhead for a miss. Furthermore, systems with !CONFIG_MMU keep
the current logic.

This patch introduces a second mmap_cache pointer, which is just
as racy as the first, but as we already know, doesn't matter in
this context. For documentation purposes, I have also added the
ACCESS_ONCE() around mm->mmap_cache updates, keeping it consistent
with the reads.

Cc: Hugh Dickins <hughd@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
---
Please note that nommu and unicore32 arch are *untested*.

I also have a patch on top of this one that caches the most 
used vma, which adds another 8-10% hit rate gain, However,
since it does add a counter to the vma structure and we have
to do more logic in find_vma to keep track, I was hesitant about
the overhead. If folks are interested I can send that out as well.


 Documentation/vm/locking                 |  4 +-
 arch/unicore32/include/asm/mmu_context.h |  2 +-
 include/linux/mm.h                       | 13 ++++++
 include/linux/mm_types.h                 | 15 ++++++-
 kernel/debug/debug_core.c                | 17 +++++++-
 kernel/fork.c                            |  2 +-
 mm/mmap.c                                | 68 ++++++++++++++++++++------------
 7 files changed, 87 insertions(+), 34 deletions(-)

diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f61228b..b4e8154 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -42,8 +42,8 @@ The rules are:
    for mm B.
 
 The caveats are:
-1. find_vma() makes use of, and updates, the mmap_cache pointer hint.
-The update of mmap_cache is racy (page stealer can race with other code
+1. find_vma() makes use of, and updates, the mmap_cache pointers hint.
+The updates of mmap_cache is racy (page stealer can race with other code
 that invokes find_vma with mmap_sem held), but that is okay, since it 
 is a hint. This can be fixed, if desired, by having find_vma grab the
 page_table_lock.
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c6..38cc7fc 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -73,7 +73,7 @@ do { \
 		else \
 			mm->mmap = NULL; \
 		rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-		mm->mmap_cache = NULL; \
+		vma_clear_caches(mm);			\
 		mm->map_count--; \
 		remove_vma(high_vma); \
 	} \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55e..2c0f8ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1534,8 +1534,21 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
 	/* Ignore errors */
 	(void) __mm_populate(addr, len, 1);
 }
+
+static inline void vma_clear_caches(struct mm_struct *mm)
+{
+	int i;
+
+	for (i = 0; i < NR_VMA_CACHES; i++)
+		mm->mmap_cache[i] = NULL;
+}
 #else
 static inline void mm_populate(unsigned long addr, unsigned long len) {}
+
+static inline void vma_clear_caches(struct mm_struct *mm)
+{
+	mm->mmap_cache = NULL;
+}
 #endif
 
 /* These take the mm semaphore themselves */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9851ee..7f92835 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -322,12 +322,23 @@ struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
 };
 
+
+#ifdef CONFIG_MMU
+enum {
+	VMA_LAST_USED, /* last find_vma result */
+	VMA_LARGEST,   /* vma that contains most address */
+	NR_VMA_CACHES
+};
+#endif
+
 struct kioctx_table;
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
-	struct vm_area_struct * mmap_cache;	/* last find_vma result */
-#ifdef CONFIG_MMU
+#ifndef CONFIG_MMU
+	struct vm_area_struct *mmap_cache;      /* last find_vma result */
+#else
+	struct vm_area_struct *mmap_cache[NR_VMA_CACHES];
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0506d44..d9d72e4 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -221,13 +221,26 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
  */
 static void kgdb_flush_swbreak_addr(unsigned long addr)
 {
+	struct mm_struct *mm = current->mm;
 	if (!CACHE_FLUSH_IS_SAFE)
 		return;
 
-	if (current->mm && current->mm->mmap_cache) {
-		flush_cache_range(current->mm->mmap_cache,
+#ifdef CONFIG_MMU
+	if (mm) {
+		int i;
+
+		for (i = 0; i < NR_VMA_CACHES; i++)
+			if (mm->mmap_cache[i])
+				flush_cache_range(mm->mmap_cache[i],
+						  addr,
+						  addr + BREAK_INSTR_SIZE);
+	}
+#else
+	if (mm && mm->mmap_cache) {
+		flush_cache_range(mm->mmap_cache,
 				  addr, addr + BREAK_INSTR_SIZE);
 	}
+#endif
 	/* Force flush instruction cache if it was outside the mm */
 	flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 086fe73..7b92666 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -363,8 +363,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
-	mm->mmap_cache = NULL;
 	mm->map_count = 0;
+	vma_clear_caches(mm);
 	cpumask_clear(mm_cpumask(mm));
 	mm->mm_rb = RB_ROOT;
 	rb_link = &mm->mm_rb.rb_node;
diff --git a/mm/mmap.c b/mm/mmap.c
index 9d54851..29c3fc0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -676,14 +676,17 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev)
 {
+	int i;
 	struct vm_area_struct *next;
 
 	vma_rb_erase(vma, &mm->mm_rb);
 	prev->vm_next = next = vma->vm_next;
 	if (next)
 		next->vm_prev = prev;
-	if (mm->mmap_cache == vma)
-		mm->mmap_cache = prev;
+
+	for (i = 0; i < NR_VMA_CACHES; i++)
+		if (mm->mmap_cache[i] == vma)
+			mm->mmap_cache[i] = prev;
 }
 
 /*
@@ -1972,34 +1975,47 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
+	unsigned long currlen = 0;
+	struct rb_node *rb_node;
 	struct vm_area_struct *vma = NULL;
 
-	/* Check the cache first. */
-	/* (Cache hit rate is typically around 35%.) */
-	vma = ACCESS_ONCE(mm->mmap_cache);
-	if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-		struct rb_node *rb_node;
+	/* Check the cache first */
+	vma = ACCESS_ONCE(mm->mmap_cache[VMA_LAST_USED]);
+	if (vma && vma->vm_end > addr && vma->vm_start <= addr)
+		goto ret;
 
-		rb_node = mm->mm_rb.rb_node;
-		vma = NULL;
+	vma = ACCESS_ONCE(mm->mmap_cache[VMA_LARGEST]);
+	if (vma) {
+		if (vma->vm_end > addr && vma->vm_start <= addr)
+			goto ret;
+		currlen = vma->vm_end - vma->vm_start;
+	}
 
-		while (rb_node) {
-			struct vm_area_struct *vma_tmp;
-
-			vma_tmp = rb_entry(rb_node,
-					   struct vm_area_struct, vm_rb);
-
-			if (vma_tmp->vm_end > addr) {
-				vma = vma_tmp;
-				if (vma_tmp->vm_start <= addr)
-					break;
-				rb_node = rb_node->rb_left;
-			} else
-				rb_node = rb_node->rb_right;
-		}
-		if (vma)
-			mm->mmap_cache = vma;
+	/* Bad cache! iterate rbtree */
+	rb_node = mm->mm_rb.rb_node;
+	vma = NULL;
+
+	while (rb_node) {
+		struct vm_area_struct *vma_tmp;
+
+		vma_tmp = rb_entry(rb_node,
+				   struct vm_area_struct, vm_rb);
+
+		if (vma_tmp->vm_end > addr) {
+			vma = vma_tmp;
+			if (vma_tmp->vm_start <= addr)
+				break;
+			rb_node = rb_node->rb_left;
+		} else
+			rb_node = rb_node->rb_right;
+	}
+
+	if (vma) {
+		ACCESS_ONCE(mm->mmap_cache[VMA_LAST_USED]) = vma;
+		if (vma->vm_end - vma->vm_start > currlen)
+			ACCESS_ONCE(mm->mmap_cache[VMA_LARGEST]) = vma;
 	}
+ret:
 	return vma;
 }
 
@@ -2371,7 +2387,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	} else
 		mm->highest_vm_end = prev ? prev->vm_end : 0;
 	tail_vma->vm_next = NULL;
-	mm->mmap_cache = NULL;		/* Kill the cache. */
+	vma_clear_caches(mm);
 }
 
 /*
-- 
1.8.1.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next             reply	other threads:[~2013-11-01 20:17 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-01 20:17 Davidlohr Bueso [this message]
2013-11-01 20:38 ` KOSAKI Motohiro
2013-11-01 21:11   ` Davidlohr Bueso
2013-11-03  9:46     ` Ingo Molnar
2013-11-03 23:57     ` KOSAKI Motohiro
2013-11-04  4:22       ` Davidlohr Bueso
2013-11-01 21:23 ` Rik van Riel
2013-11-03 10:12 ` Ingo Molnar
2013-11-04  4:20   ` Davidlohr Bueso
2013-11-04  4:48     ` converting unicore32 to gate_vma as done for arm (was Re: [PATCH] mm: cache largest vma) Al Viro
2013-11-05  2:49       ` 管雪涛
2013-11-11  7:25         ` converting unicore32 to gate_vma as done for arm (was " Al Viro
2013-11-04  7:00     ` [PATCH] mm: cache largest vma Ingo Molnar
2013-11-04  7:05     ` Ingo Molnar
2013-11-04 14:20       ` Frederic Weisbecker
2013-11-04 17:52         ` Ingo Molnar
2013-11-04 18:10           ` Frederic Weisbecker
2013-11-05  8:24             ` Ingo Molnar
2013-11-05 14:27               ` Jiri Olsa
2013-11-06  6:01                 ` Ingo Molnar
2013-11-06 14:03                   ` Konstantin Khlebnikov
2013-11-03 18:51 ` Linus Torvalds
2013-11-04  4:04   ` Davidlohr Bueso
2013-11-04  7:36     ` Ingo Molnar
2013-11-04 14:56       ` Michel Lespinasse
2013-11-11  4:12       ` Davidlohr Bueso
2013-11-11  7:43         ` Michel Lespinasse
2013-11-11 12:04           ` Ingo Molnar
2013-11-11 20:47             ` Davidlohr Bueso
2013-11-13 17:08               ` Davidlohr Bueso
2013-11-13 17:59                 ` Ingo Molnar
2013-11-13 18:16               ` Peter Zijlstra
2013-11-11 12:01         ` Ingo Molnar
2013-11-11 18:24           ` Davidlohr Bueso
2013-11-11 20:47             ` Ingo Molnar
2013-11-11 20:59               ` Davidlohr Bueso
2013-11-11 21:09                 ` Ingo Molnar
2013-11-04  7:03   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1383337039.2653.18.camel@buesod1.americas.hpqcorp.net \
    --to=davidlohr@hp.com \
    --cc=akpm@linux-foundation.org \
    --cc=aswin@hp.com \
    --cc=gxt@mprc.pku.edu.cn \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=riel@redhat.com \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox