linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	linux-mm <linux-mm@kvack.org>, shak <dshaks@redhat.com>
Subject: Re: [PATCH] lazy freeing of memory through MADV_FREE
Date: Sat, 21 Apr 2007 03:12:02 -0400	[thread overview]
Message-ID: <20070421071202.GA355@devserv.devel.redhat.com> (raw)
In-Reply-To: <4629524C.5040302@redhat.com>

On Fri, Apr 20, 2007 at 07:52:44PM -0400, Rik van Riel wrote:
> It turns out that Nick's patch does not improve peak
> performance much, but it does prevent the decline when
> running with 16 threads on my quad core CPU!
> 
> We _definately_ want both patches, there's a huge benefit
> in having them both.
> 
> Here are the transactions/seconds for each combination:
> 
>    vanilla   new glibc  madv_free kernel   madv_free + mmap_sem
> threads
> 
> 1     610         609             596                545
> 2    1032        1136            1196               1200
> 4    1070        1128            2014               2024
> 8    1000        1088            1665               2087
> 16    779        1073            1310               1999

FYI, I have uploaded a testing glibc that uses MADV_FREE and falls back
to MADV_DONTUSE if MADV_FREE is not available, to
http://people.redhat.com/jakub/glibc/2.5.90-21.1/
and I'm also attaching the glibc patch for those who want to build it
themselves:

2007-04-19  Ulrich Drepper  <drepper@redhat.com>
	    Jakub Jelinek  <jakub@redhat.com>

	* malloc/arena.c (heap_info): Add mprotect_size field, adjust pad.
	(new_heap): Initialize mprotect_size.
	(no_madv_free): New variable.
	(grow_heap): When growing, only mprotect from mprotect_size till
	new_size if mprotect_size is smaller.  When shrinking, use PROT_NONE
	MMAP for __libc_enable_secure only, otherwise if MADV_FREE is
	available use it and fall back to MADV_DONTNEED.
	* sysdeps/unix/sysv/linux/alpha/bits/mman.h (MADV_FREE): Define.
	* sysdeps/unix/sysv/linux/ia64/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/i386/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/s390/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/powerpc/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/x86_64/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/sparc/bits/mman.h (MADV_FREE): Likewise.
	* sysdeps/unix/sysv/linux/sh/bits/mman.h (MADV_FREE): Likewise.

--- libc/malloc/arena.c.jj	2006-10-31 23:05:31.000000000 +0100
+++ libc/malloc/arena.c	2007-04-19 18:54:20.000000000 +0200
@@ -1,5 +1,6 @@
 /* Malloc implementation for multiple threads without lock contention.
-   Copyright (C) 2001,2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 2001,2002,2003,2004,2005,2006,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
 
@@ -59,10 +60,12 @@ typedef struct _heap_info {
   mstate ar_ptr; /* Arena for this heap. */
   struct _heap_info *prev; /* Previous heap. */
   size_t size;   /* Current size in bytes. */
+  size_t mprotect_size;	/* Size in bytes that has been mprotected
+			   PROT_READ|PROT_WRITE.  */
   /* Make sure the following data is properly aligned, particularly
      that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
-     MALLOG_ALIGNMENT. */
-  char pad[-5 * SIZE_SZ & MALLOC_ALIGN_MASK];
+     MALLOC_ALIGNMENT. */
+  char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
 } heap_info;
 
 /* Get a compile-time error if the heap_info padding is not correct
@@ -692,10 +695,15 @@ new_heap(size, top_pad) size_t size, top
   }
   h = (heap_info *)p2;
   h->size = size;
+  h->mprotect_size = size;
   THREAD_STAT(stat_n_heaps++);
   return h;
 }
 
+#if defined _LIBC && defined MADV_FREE
+static int no_madv_free;
+#endif
+
 /* Grow or shrink a heap.  size is automatically rounded up to a
    multiple of the page size if it is positive. */
 
@@ -714,17 +722,49 @@ grow_heap(h, diff) heap_info *h; long di
     new_size = (long)h->size + diff;
     if((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE)
       return -1;
-    if(mprotect((char *)h + h->size, diff, PROT_READ|PROT_WRITE) != 0)
-      return -2;
+    if((unsigned long) new_size > h->mprotect_size) {
+      if (mprotect((char *)h + h->mprotect_size,
+		   (unsigned long) new_size - h->mprotect_size,
+		   PROT_READ|PROT_WRITE) != 0)
+	return -2;
+      h->mprotect_size = new_size;
+    }
   } else {
     new_size = (long)h->size + diff;
     if(new_size < (long)sizeof(*h))
       return -1;
     /* Try to re-map the extra heap space freshly to save memory, and
        make it inaccessible. */
-    if((char *)MMAP((char *)h + new_size, -diff, PROT_NONE,
-                    MAP_PRIVATE|MAP_FIXED) == (char *) MAP_FAILED)
-      return -2;
+#ifdef _LIBC
+    if (__builtin_expect (__libc_enable_secure, 0))
+#else
+    if (1)
+#endif
+      {
+	if((char *)MMAP((char *)h + new_size, -diff, PROT_NONE,
+			MAP_PRIVATE|MAP_FIXED) == (char *) MAP_FAILED)
+	  return -2;
+	h->mprotect_size = new_size;
+      }
+#ifdef _LIBC
+    else
+      {
+# ifdef MADV_FREE
+	if (!__builtin_expect (no_madv_free, 0))
+	  {
+	    if (__builtin_expect (madvise ((char *)h + new_size,
+					   -diff, MADV_FREE), 0) == -1
+		&& errno == EINVAL)
+	      {
+		no_madv_free = 1;
+		madvise ((char *)h + new_size, -diff, MADV_DONTNEED);
+	      }
+	  }
+	else
+# endif
+	  madvise ((char *)h + new_size, -diff, MADV_DONTNEED);
+      }
+#endif
     /*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/
   }
   h->size = new_size;
--- libc/sysdeps/unix/sysv/linux/alpha/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/alpha/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/Alpha version.
-   Copyright (C) 1997, 1998, 2000, 2003, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 2000, 2003, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -96,6 +97,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED   3	/* Will need these pages.  */
 # define MADV_DONTNEED   6	/* Don't need these pages.  */
+# define MADV_FREE	 7	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/ia64/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/ia64/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/ia64 version.
-   Copyright (C) 1997,1998,2000,2003,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 1997,1998,2000,2003,2005,2006,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -89,6 +90,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/i386/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/i386/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/i386 version.
-   Copyright (C) 1997, 2000, 2003, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1997, 2000, 2003, 2005, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -88,6 +89,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/s390/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/s390/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/s390 version.
-   Copyright (C) 2000,2001,2002,2003,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 2000,2001,2002,2003,2005,2006,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -89,6 +90,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/PowerPC version.
-   Copyright (C) 1997, 2000, 2003, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1997, 2000, 2003, 2005, 2006, 2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -89,6 +90,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h.jj	2006-05-02 16:33:46.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/x86_64/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,5 @@
 /* Definitions for POSIX memory map interface.  Linux/x86_64 version.
-   Copyright (C) 2001, 2003, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -89,6 +89,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/SPARC version.
-   Copyright (C) 1997,1999,2000,2003,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 1997,1999,2000,2003,2005,2006,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -90,7 +91,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
-# define MADV_FREE	 5	/* Content can be freed (Solaris).  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */
--- libc/sysdeps/unix/sysv/linux/sh/bits/mman.h.jj	2006-05-02 16:33:44.000000000 +0200
+++ libc/sysdeps/unix/sysv/linux/sh/bits/mman.h	2007-04-19 18:37:43.000000000 +0200
@@ -1,5 +1,6 @@
 /* Definitions for POSIX memory map interface.  Linux/SH version.
-   Copyright (C) 1997,1999,2000,2003,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 1997,1999,2000,2003,2005,2006,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -88,6 +89,7 @@
 # define MADV_SEQUENTIAL 2	/* Expect sequential page references.  */
 # define MADV_WILLNEED	 3	/* Will need these pages.  */
 # define MADV_DONTNEED	 4	/* Don't need these pages.  */
+# define MADV_FREE	 5	/* Content can be freed.  */
 # define MADV_REMOVE	 9	/* Remove these pages and resources.  */
 # define MADV_DONTFORK	 10	/* Do not inherit across fork.  */
 # define MADV_DOFORK	 11	/* Do inherit across fork.  */


	Jakub

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-04-21  7:12 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-17  7:15 Rik van Riel
2007-04-19 21:15 ` [PATCH] lazy freeing of memory through MADV_FREE 2/2 Rik van Riel
2007-04-20 21:03   ` Andrew Morton
2007-04-20 21:24     ` Ulrich Drepper
2007-04-21  7:37       ` Hugh Dickins
2007-04-21 16:32         ` Ulrich Drepper
2007-04-20 20:57 ` [PATCH] lazy freeing of memory through MADV_FREE Andrew Morton
2007-04-20 21:38   ` Rik van Riel
2007-04-20 22:06     ` Andrew Morton
2007-04-20 23:52       ` Rik van Riel
2007-04-21  0:48         ` Eric Dumazet
2007-04-21  3:58           ` Rik van Riel
2007-04-21  7:12         ` Jakub Jelinek [this message]
2007-04-23  4:36           ` Nick Piggin
2007-04-22  2:36         ` Nick Piggin
2007-04-22  2:50           ` Nick Piggin
2007-04-22  6:31           ` Rik van Riel
2007-04-23  0:16             ` Nick Piggin
2007-04-23  3:53               ` Rik van Riel
2007-04-23  3:58                 ` Nick Piggin
2007-04-23 10:07                   ` Nick Piggin
2007-04-23 10:12                     ` Rik van Riel
2007-04-23  3:59                 ` Rik van Riel
2007-04-23  9:20                   ` Rik van Riel
2007-04-23 10:21                     ` Nick Piggin
2007-04-23 10:31                       ` Rik van Riel
2007-04-23 10:35                         ` Nick Piggin
2007-04-23 10:44                           ` Rik van Riel
2007-04-24  1:15                             ` Nick Piggin
2007-04-24  1:58                               ` Rik van Riel
2007-04-24  2:16                                 ` Nick Piggin
2007-04-24  4:42                                 ` Paul Mackerras
2007-04-24  5:13                                   ` Rik van Riel
2007-04-24  2:53                           ` Rik van Riel
2007-04-24  3:08                             ` Andrew Morton
2007-04-23 10:44                       ` Jakub Jelinek
2007-04-23 11:45                     ` Rik van Riel
2007-04-23  4:28           ` Rik van Riel
2007-04-21  7:24     ` Hugh Dickins
2007-04-21 18:06       ` Rik van Riel
2007-04-22  8:18 ` Andrew Morton
2007-04-22  9:16   ` Christoph Hellwig
2007-04-22 16:55     ` Ulrich Drepper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070421071202.GA355@devserv.devel.redhat.com \
    --to=jakub@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dshaks@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox