linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Ulrich Drepper <drepper@redhat.com>
Cc: Blaisorblade <blaisorblade@yahoo.it>,
	Andrew Morton <akpm@osdl.org>,
	linux-kernel@vger.kernel.org,
	Linux Memory Management <linux-mm@kvack.org>,
	Val Henson <val.henson@intel.com>
Subject: Re: [patch 00/14] remap_file_pages protection support
Date: Sun, 14 May 2006 00:13:21 +1000	[thread overview]
Message-ID: <4465E981.60302@yahoo.com.au> (raw)
In-Reply-To: <445D75EB.5030909@yahoo.com.au>

[-- Attachment #1: Type: text/plain, Size: 1433 bytes --]

Nick Piggin wrote:

> I think possibly each thread should have a private vma cache, with
> room for at least its stack vma(s), (and several others, eg. code,
> data). Perhaps the per-mm cache could be dispensed with completely,
> although it might be useful eg. for the heap. And it might be helped
> with increased entries as well.
> 
> I've got patches lying around to implement this stuff -- I'd be
> interested to have more detail about this problem, or distilled test
> cases.

OK, I got interested again, but can't get Val's ebizzy to give me
a find_vma constrained workload yet (though the numbers back up
my assertion that the vma cache is crap for threaded apps).

Without the patch, after bootup, the vma cache gets 208 364 hits out
of 438 535 lookups (47.5%)

./ebizzy -t16: 384.29user 754.61system 5:31.87elapsed 343%CPU

And ebizzy gets 7 373 078 hits out of 82 255 863 lookups (8.9%)


With mm + 4 slot LRU per-thread cache (this patch):
After boot, 303 767 / 439 918 = 69.0%

./ebizzy -t16: 388.73user 750.29system 5:30.24elapsed 344%CPU

ebizzy hits: 53 024 083 / 82 055 195 = 64.6%


So on a non-threaded workload, hit rate is increased by about 50%;
on a threaded workload it is increased by over 700%. In rbtree-walk
-constrained workloads, the total find_vma speedup should be linear
to the hit ratio improvement.

I don't think my ebizzy numbers can justify the patch though...

Nick

-- 
SUSE Labs, Novell Inc.

[-- Attachment #2: vma.patch --]
[-- Type: text/plain, Size: 6766 bytes --]

Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c	2006-05-13 23:31:13.000000000 +1000
+++ linux-2.6/mm/mmap.c	2006-05-13 23:48:53.000000000 +1000
@@ -30,6 +30,99 @@
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 
+static void vma_cache_touch(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	struct task_struct *curr = current;
+	if (mm == curr->mm) {
+		int i;
+		if (curr->vma_cache_sequence != mm->vma_sequence) {
+			curr->vma_cache_sequence = mm->vma_sequence;
+			curr->vma_cache[0] = vma;
+			for (i = 1; i < 4; i++)
+				curr->vma_cache[i] = NULL;
+		} else {
+			int update_mm;
+
+			if (curr->vma_cache[0] == vma)
+				return;
+
+			for (i = 1; i < 4; i++) {
+				if (curr->vma_cache[i] == vma)
+					break;
+			}
+			update_mm = 0;
+			if (i == 4) {
+				update_mm = 1;
+				i = 3;
+			}
+			while (i != 0) {
+				curr->vma_cache[i] = curr->vma_cache[i-1];
+				i--;
+			}
+			curr->vma_cache[0] = vma;
+
+			if (!update_mm)
+				return;
+		}
+	}
+
+	if (mm->vma_cache != vma) /* prevent cacheline bouncing */
+		mm->vma_cache = vma;
+}
+
+static void vma_cache_replace(struct mm_struct *mm, struct vm_area_struct *vma,
+						struct vm_area_struct *repl)
+{
+	mm->vma_sequence++;
+	if (unlikely(mm->vma_sequence == 0)) {
+		struct task_struct *curr = current, *t;
+		t = curr;
+		rcu_read_lock();
+		do {
+			t->vma_cache_sequence = -1;
+			t = next_thread(t);
+		} while (t != curr);
+		rcu_read_unlock();
+	}
+
+	if (mm->vma_cache == vma)
+		mm->vma_cache = repl;
+}
+
+static inline void vma_cache_invalidate(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	vma_cache_replace(mm, vma, NULL);
+}
+
+static struct vm_area_struct *vma_cache_find(struct mm_struct *mm,
+						unsigned long addr)
+{
+	struct task_struct *curr;
+	struct vm_area_struct *vma;
+
+	inc_page_state(vma_cache_query);
+
+	curr = current;
+	if (mm == curr->mm && mm->vma_sequence == curr->vma_cache_sequence) {
+		int i;
+		for (i = 0; i < 4; i++) {
+			vma = curr->vma_cache[i];
+			if (vma && vma->vm_end > addr && vma->vm_start <= addr){
+				inc_page_state(vma_cache_hit);
+				return vma;
+			}
+		}
+	}
+
+	vma = mm->vma_cache;
+	if (vma && vma->vm_end > addr && vma->vm_start <= addr) {
+		inc_page_state(vma_cache_hit);
+		return vma;
+	}
+
+	return NULL;
+}
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -460,8 +553,6 @@
 {
 	prev->vm_next = vma->vm_next;
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
-	if (mm->mmap_cache == vma)
-		mm->mmap_cache = prev;
 }
 
 /*
@@ -586,6 +677,7 @@
 		 * us to remove next before dropping the locks.
 		 */
 		__vma_unlink(mm, next, vma);
+		vma_cache_replace(mm, next, vma);
 		if (file)
 			__remove_shared_vm_struct(next, file, mapping);
 		if (next->anon_vma)
@@ -1384,8 +1476,8 @@
 	if (mm) {
 		/* Check the cache first. */
 		/* (Cache hit rate is typically around 35%.) */
-		vma = mm->mmap_cache;
-		if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+		vma = vma_cache_find(mm, addr);
+		if (!vma) {
 			struct rb_node * rb_node;
 
 			rb_node = mm->mm_rb.rb_node;
@@ -1405,9 +1497,9 @@
 				} else
 					rb_node = rb_node->rb_right;
 			}
-			if (vma)
-				mm->mmap_cache = vma;
 		}
+		if (vma)
+			vma_cache_touch(mm, vma);
 	}
 	return vma;
 }
@@ -1424,6 +1516,14 @@
 	if (!mm)
 		goto out;
 
+	vma = vma_cache_find(mm, addr);
+	if (vma) {
+		rb_node = rb_prev(&vma->vm_rb);
+		if (rb_node)
+			prev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+		goto out;
+	}
+
 	/* Guard against addr being lower than the first VMA */
 	vma = mm->mmap;
 
@@ -1445,6 +1545,9 @@
 	}
 
 out:
+	if (vma)
+		vma_cache_touch(mm, vma);
+
 	*pprev = prev;
 	return prev ? prev->vm_next : vma;
 }
@@ -1686,6 +1789,7 @@
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	do {
+		vma_cache_invalidate(mm, vma);
 		rb_erase(&vma->vm_rb, &mm->mm_rb);
 		mm->map_count--;
 		tail_vma = vma;
@@ -1698,7 +1802,6 @@
 	else
 		addr = vma ?  vma->vm_start : mm->mmap_base;
 	mm->unmap_area(mm, addr);
-	mm->mmap_cache = NULL;		/* Kill the cache. */
 }
 
 /*
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h	2006-05-13 23:31:05.000000000 +1000
+++ linux-2.6/include/linux/page-flags.h	2006-05-13 23:31:44.000000000 +1000
@@ -164,6 +164,9 @@
 
 	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
 	unsigned long nr_bounce;	/* pages for bounce buffers */
+
+	unsigned long vma_cache_hit;
+	unsigned long vma_cache_query;
 };
 
 extern void get_page_state(struct page_state *ret);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c	2006-05-13 23:31:05.000000000 +1000
+++ linux-2.6/mm/page_alloc.c	2006-05-13 23:31:44.000000000 +1000
@@ -2389,6 +2389,9 @@
 
 	"pgrotated",
 	"nr_bounce",
+
+	"vma_cache_hit",
+	"vma_cache_query",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h	2006-05-13 23:31:03.000000000 +1000
+++ linux-2.6/include/linux/sched.h	2006-05-13 23:33:01.000000000 +1000
@@ -293,9 +293,11 @@
 } while (0)
 
 struct mm_struct {
-	struct vm_area_struct * mmap;		/* list of VMAs */
+	struct vm_area_struct *mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
-	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+	struct vm_area_struct *vma_cache;
+	unsigned long vma_sequence;
+
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
@@ -734,6 +736,8 @@
 	struct list_head ptrace_list;
 
 	struct mm_struct *mm, *active_mm;
+	struct vm_area_struct *vma_cache[4];
+	unsigned long vma_cache_sequence;
 
 /* task state */
 	struct linux_binfmt *binfmt;
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c	2006-05-13 23:31:03.000000000 +1000
+++ linux-2.6/kernel/fork.c	2006-05-13 23:32:59.000000000 +1000
@@ -197,7 +197,7 @@
 
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
-	mm->mmap_cache = NULL;
+	mm->vma_sequence = oldmm->vma_sequence+1;
 	mm->free_area_cache = oldmm->mmap_base;
 	mm->cached_hole_size = ~0UL;
 	mm->map_count = 0;
@@ -238,6 +238,10 @@
 		tmp->vm_next = NULL;
 		anon_vma_link(tmp);
 		file = tmp->vm_file;
+
+		if (oldmm->vma_cache == mpnt)
+			mm->vma_cache = tmp;
+
 		if (file) {
 			struct inode *inode = file->f_dentry->d_inode;
 			get_file(file);

  reply	other threads:[~2006-05-13 14:13 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20060430172953.409399000@zion.home.lan>
2006-05-02  3:45 ` Nick Piggin
2006-05-02  3:56   ` Nick Piggin
2006-05-02 11:24     ` Ingo Molnar
2006-05-02 12:19       ` Nick Piggin
2006-05-02 17:16   ` Lee Schermerhorn
2006-05-03  1:20     ` Blaisorblade
2006-05-03 14:35       ` Lee Schermerhorn
2006-05-03  0:25   ` Blaisorblade
2006-05-06 16:05     ` Ulrich Drepper
2006-05-07  4:22       ` Nick Piggin
2006-05-13 14:13         ` Nick Piggin [this message]
2006-05-13 18:19           ` Valerie Henson
2006-05-13 22:54             ` Valerie Henson
2006-05-16 13:30             ` Nick Piggin
2006-05-16 13:51               ` Andreas Mohr
2006-05-16 16:31                 ` Valerie Henson
2006-05-16 16:47                   ` Andreas Mohr
2006-05-17  3:25                     ` Nick Piggin
2006-05-17  6:10                     ` Blaisorblade
2006-05-16 16:33               ` Valerie Henson
2006-05-03  0:44   ` Blaisorblade
2006-05-06  9:06     ` Nick Piggin
2006-05-06 15:26       ` Ulrich Drepper
     [not found] ` <20060430173025.752423000@zion.home.lan>
2006-05-02  3:53   ` [patch 11/14] remap_file_pages protection support: pte_present should not trigger on PTE_FILE PROTNONE ptes Nick Piggin
2006-05-03  1:29     ` Blaisorblade
2006-05-06 10:03       ` Nick Piggin
2006-05-07 17:50         ` Blaisorblade

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4465E981.60302@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=akpm@osdl.org \
    --cc=blaisorblade@yahoo.it \
    --cc=drepper@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=val.henson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox