linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Jakub Jelinek <jakub@redhat.com>
Cc: Ulrich Drepper <drepper@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	Linux Memory Management <linux-mm@kvack.org>
Subject: Re: missing madvise functionality
Date: Thu, 05 Apr 2007 14:23:20 +1000	[thread overview]
Message-ID: <461479B8.9090203@yahoo.com.au> (raw)
In-Reply-To: <4613660C.5010108@yahoo.com.au>

[-- Attachment #1: Type: text/plain, Size: 1872 bytes --]

Nick Piggin wrote:
> Jakub Jelinek wrote:
> 
>> On Wed, Apr 04, 2007 at 05:46:12PM +1000, Nick Piggin wrote:
>>
>>> Does mmap(PROT_NONE) actually free the memory?
>>
>>
>>
>> Yes.
>>         /* Clear old maps */
>>         error = -ENOMEM;
>> munmap_back:
>>         vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
>>         if (vma && vma->vm_start < addr + len) {
>>                 if (do_munmap(mm, addr, len))
>>                         return -ENOMEM;
>>                 goto munmap_back;
>>         }
> 
> 
> Thanks, I overlooked the mmap vs mprotect detail. So how are the subsequent
> access faults avoided?

AFAIKS, the faults are not avoided. Not for single page allocations, not
for multi-page allocations.

So what glibc currently does to allocate, use, then deallocate a page is
this:
   mprotect(PROT_READ|PROT_WRITE) -> down_write(mmap_sem)
   touch page -> page fault -> down_read(mmap_sem)
   mmap(PROT_NONE) -> down_write(mmap_sem)

What it could be doing is:
   touch page -> page fault -> down_read(mmap_sem)
   madvise(MADV_DONTNEED) -> down_read(mmap_sem)

So after my previously posted patch (attached again) to only take down_read
in madvise where possible...

With 2 threads, the attached test.c ends up doing about 140,000 context
switches per second with just 2 threads/2CPUs, takes a little over 2
million faults, and about 80 seconds to complete, when running the
old_test() function (ie. mprotect,touch,mmap).

When running new_test() (ie. touch,madvise), context switches stay well
under 100, it takes slightly fewer faults, and it completes in about 8
seconds.

With 1 thread, new_test() actually completes in under half the time as
well (4.55 vs 9.88 seconds). This result won't have been altered by my
madvise patch, because the down_write fastpath is no slower than down_read.

Any comments?

-- 
SUSE Labs, Novell Inc.

[-- Attachment #2: madv-mmap_sem.patch --]
[-- Type: text/plain, Size: 1305 bytes --]

Index: linux-2.6/mm/madvise.c
===================================================================
--- linux-2.6.orig/mm/madvise.c
+++ linux-2.6/mm/madvise.c
@@ -12,6 +12,25 @@
 #include <linux/hugetlb.h>
 
 /*
+ * Any behaviour which results in changes to the vma->vm_flags needs to
+ * take mmap_sem for writing. Others, which simply traverse vmas, need
+ * to only take it for reading.
+ */
+static int madvise_need_mmap_write(int behavior)
+{
+	switch (behavior) {
+	case MADV_DOFORK:
+	case MADV_DONTFORK:
+	case MADV_NORMAL:
+	case MADV_SEQUENTIAL:
+	case MADV_RANDOM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
  * We can potentially split a vm area into separate
  * areas, each area with its own behavior.
  */
@@ -264,7 +283,10 @@ asmlinkage long sys_madvise(unsigned lon
 	int error = -EINVAL;
 	size_t len;
 
-	down_write(&current->mm->mmap_sem);
+	if (madvise_need_mmap_write(behavior))
+		down_write(&current->mm->mmap_sem);
+	else
+		down_read(&current->mm->mmap_sem);
 
 	if (start & ~PAGE_MASK)
 		goto out;
@@ -323,6 +345,10 @@ asmlinkage long sys_madvise(unsigned lon
 		vma = prev->vm_next;
 	}
 out:
-	up_write(&current->mm->mmap_sem);
+	if (madvise_need_mmap_write(behavior))
+		up_write(&current->mm->mmap_sem);
+	else
+		up_read(&current->mm->mmap_sem);
+
 	return error;
 }

[-- Attachment #3: test.c --]
[-- Type: text/x-csrc, Size: 1868 bytes --]

#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
#include <pthread.h>

#define NR_THREADS	1
#define ITERS	1000000
#define HEAPSIZE	(4*1024)

static void *old_thread(void *heap)
{
	int i;

	for (i = 0; i < ITERS; i++) {
		char *mem = heap;
		if (mprotect(heap, HEAPSIZE, PROT_READ|PROT_WRITE) == -1)
			perror("mprotect"), exit(1);
		*mem = i;
		if (mmap(heap, HEAPSIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == MAP_FAILED)
			perror("mmap"), exit(1);
	}

	return NULL;
}

static void old_test(void)
{
	void *heap;
	pthread_t pt[NR_THREADS];
	int i;

	heap = mmap(NULL, NR_THREADS*HEAPSIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	if (heap == MAP_FAILED)
		perror("mmap"), exit(1);

	for (i = 0; i < NR_THREADS; i++) {
		if (pthread_create(&pt[i], NULL, old_thread, heap + i*HEAPSIZE) == -1)
			perror("pthread_create"), exit(1);
	}
	for (i = 0; i < NR_THREADS; i++) {
		if (pthread_join(pt[i], NULL) == -1)
			perror("pthread_join"), exit(1);
	}

	if (munmap(heap, NR_THREADS*HEAPSIZE) == -1)
		perror("munmap"), exit(1);
}

static void *new_thread(void *heap)
{
	int i;

	for (i = 0; i < ITERS; i++) {
		char *mem = heap;
		*mem = i;
		if (madvise(heap, HEAPSIZE, MADV_DONTNEED) == -1)
			perror("madvise"), exit(1);
	}

	return NULL;
}

static void new_test(void)
{
	void *heap;
	pthread_t pt[NR_THREADS];
	int i;

	heap = mmap(NULL, HEAPSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	if (heap == MAP_FAILED)
		perror("mmap"), exit(1);

	for (i = 0; i < NR_THREADS; i++) {
		if (pthread_create(&pt[i], NULL, new_thread, heap + i*HEAPSIZE) == -1)
			perror("pthread_create"), exit(1);
	}
	for (i = 0; i < NR_THREADS; i++) {
		if (pthread_join(pt[i], NULL) == -1)
			perror("pthread_join"), exit(1);
	}

	if (munmap(heap, HEAPSIZE) == -1)
		perror("munmap"), exit(1);
}

int main(void)
{
	old_test();

	exit(0);
}


  reply	other threads:[~2007-04-05  4:23 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <46128051.9000609@redhat.com>
     [not found] ` <p73648dz5oa.fsf@bingen.suse.de>
     [not found]   ` <46128CC2.9090809@redhat.com>
     [not found]     ` <20070403172841.GB23689@one.firstfloor.org>
2007-04-03 19:59       ` Andrew Morton
2007-04-03 20:09         ` Andi Kleen
2007-04-03 20:17         ` Ulrich Drepper
2007-04-03 20:29           ` Jakub Jelinek
2007-04-03 20:38             ` Rik van Riel
2007-04-03 21:49             ` Andrew Morton
2007-04-03 23:01               ` Eric Dumazet
2007-04-04  2:22                 ` Nick Piggin
2007-04-04  5:41                   ` Eric Dumazet
2007-04-04  6:09                     ` [patches] threaded vma patches (was Re: missing madvise functionality) Nick Piggin
2007-04-04  6:26                       ` Andrew Morton
2007-04-04  6:38                         ` Nick Piggin
2007-04-04  6:42                       ` Ulrich Drepper
2007-04-04  6:44                         ` Nick Piggin
2007-04-04  6:50                         ` Eric Dumazet
2007-04-04  6:54                           ` Ulrich Drepper
2007-04-04  7:33                             ` Eric Dumazet
2007-04-04  8:25                   ` missing madvise functionality Peter Zijlstra
2007-04-04  8:55                     ` Nick Piggin
2007-04-04  9:12                       ` William Lee Irwin III
2007-04-04  9:23                         ` Nick Piggin
2007-04-04  9:34                       ` Eric Dumazet
2007-04-04  9:45                         ` Nick Piggin
2007-04-04 10:05                         ` Nick Piggin
2007-04-04 11:54                           ` Eric Dumazet
2007-04-05  2:01                             ` Nick Piggin
2007-04-05  6:09                               ` Eric Dumazet
2007-04-05  6:19                                 ` Ulrich Drepper
2007-04-05  6:54                                   ` Eric Dumazet
2007-04-03 23:02               ` Andrew Morton
2007-04-04  9:15                 ` Hugh Dickins
2007-04-04 14:55                   ` Rik van Riel
2007-04-04 15:25                     ` Hugh Dickins
2007-04-05  1:44                       ` Nick Piggin
2007-04-04 18:04                   ` Andrew Morton
2007-04-04 18:08                     ` Rik van Riel
2007-04-04 20:56                       ` Andrew Morton
2007-04-04 18:39                     ` Hugh Dickins
2007-04-03 23:44               ` Andrew Morton
2007-04-04 13:09             ` William Lee Irwin III
2007-04-04 13:38               ` William Lee Irwin III
2007-04-04 18:51               ` Andrew Morton
2007-04-05  4:14                 ` William Lee Irwin III
2007-04-04 23:00             ` preemption and rwsems (was: Re: missing madvise functionality) Andrew Morton
2007-04-05  7:31             ` missing madvise functionality Rik van Riel
2007-04-05  7:39               ` Rik van Riel
2007-04-05  8:32                 ` Andrew Morton
2007-04-05 15:47                   ` Rik van Riel
2007-04-05  8:08               ` Eric Dumazet
2007-04-05  8:31                 ` Rik van Riel
2007-04-05  9:06                   ` Eric Dumazet
2007-04-05  9:45               ` Jakub Jelinek
2007-04-05 16:15                 ` Rik van Riel
2007-04-05 16:10               ` Ulrich Drepper
2007-04-06  2:28                 ` Nick Piggin
2007-04-06  2:52                   ` Ulrich Drepper
2007-04-06  2:59                     ` Nick Piggin
2007-04-05 12:48             ` preemption and rwsems (was: Re: missing madvise functionality) David Howells
2007-04-05 19:11               ` Ingo Molnar
2007-04-05 20:37                 ` Andrew Morton
2007-04-06  9:08                   ` Ingo Molnar
2007-04-06 19:30                     ` Andrew Morton
2007-04-06 19:40                       ` Ingo Molnar
2007-04-05 19:27               ` Andrew Morton
2007-04-03 20:51           ` missing madvise functionality Andrew Morton
2007-04-03 20:57             ` Ulrich Drepper
2007-04-03 21:00             ` Rik van Riel
2007-04-03 21:10               ` Eric Dumazet
2007-04-03 21:12                 ` Jörn Engel
2007-04-03 21:15                 ` Rik van Riel
2007-04-03 21:30                   ` Eric Dumazet
2007-04-03 21:22                 ` Jeremy Fitzhardinge
2007-04-03 21:29                   ` Rik van Riel
2007-04-03 21:46                 ` Ulrich Drepper
2007-04-03 22:51                   ` Andi Kleen
2007-04-03 23:07                     ` Ulrich Drepper
2007-04-03 21:16               ` Andrew Morton
2007-04-04 18:49             ` Anton Blanchard
2007-04-04  7:46 ` Nick Piggin
2007-04-04  8:04   ` Nick Piggin
2007-04-04  8:20   ` Jakub Jelinek
2007-04-04  8:47     ` Nick Piggin
2007-04-05  4:23       ` Nick Piggin [this message]
2007-04-05 18:38   ` Rik van Riel
2007-04-05 21:07     ` Andrew Morton
2007-04-05 21:39       ` Rik van Riel
2007-04-06  1:28     ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=461479B8.9090203@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=akpm@linux-foundation.org \
    --cc=drepper@redhat.com \
    --cc=jakub@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox