linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: clameter@sgi.com
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Pekka Enberg <penberg@cs.helsinki.fi>,
	suresh.b.siddha@intel.com
Subject: [patch 22/26] SLUB: kmem_cache_vacate to support page allocator memory defragmentation
Date: Mon, 18 Jun 2007 02:59:00 -0700	[thread overview]
Message-ID: <20070618095918.636873785@sgi.com> (raw)
In-Reply-To: <20070618095838.238615343@sgi.com>

[-- Attachment #1: slab_defrag_kmem_cache_vacate --]
[-- Type: text/plain, Size: 7202 bytes --]

Special function kmem_cache_vacate() to push out the objects in a
specified slab. In order to make that work we will have to handle
slab page allocations in such a way that we can determine if a slab is valid whenever we access it regardless of its time in life.

A valid slab that can be freed has PageSlab(page) and page->inuse > 0 set.
So we need to make sure in allocate_slab that page->inuse is zero before
PageSlab is set otherwise kmem_cache_vacate may operate on a slab that
has not been properly setup yet.

There is currently no in kernel user. The hope is that Mel's defragmentation
method can at some point use this functionality to make slabs movable
so that the reclaimable type of pages may not be necessary anymore.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 include/linux/slab.h |    1 
 mm/slab.c            |    9 ++++
 mm/slob.c            |    9 ++++
 mm/slub.c            |  109 ++++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 119 insertions(+), 9 deletions(-)

Index: linux-2.6.22-rc4-mm2/include/linux/slab.h
===================================================================
--- linux-2.6.22-rc4-mm2.orig/include/linux/slab.h	2007-06-17 18:12:29.000000000 -0700
+++ linux-2.6.22-rc4-mm2/include/linux/slab.h	2007-06-17 18:12:37.000000000 -0700
@@ -98,6 +98,7 @@ unsigned int kmem_cache_size(struct kmem
 const char *kmem_cache_name(struct kmem_cache *);
 int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 int kmem_cache_defrag(int percentage, int node);
+int kmem_cache_vacate(struct page *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
Index: linux-2.6.22-rc4-mm2/mm/slab.c
===================================================================
--- linux-2.6.22-rc4-mm2.orig/mm/slab.c	2007-06-17 18:12:22.000000000 -0700
+++ linux-2.6.22-rc4-mm2/mm/slab.c	2007-06-17 18:12:37.000000000 -0700
@@ -2523,6 +2523,15 @@ int kmem_cache_defrag(int percent, int n
 	return 0;
 }
 
+/*
+ * SLAB does not support slab defragmentation
+ */
+int kmem_cache_vacate(struct page *page)
+{
+	return 0;
+}
+EXPORT_SYMBOL(kmem_cache_vacate);
+
 /**
  * kmem_cache_destroy - delete a cache
  * @cachep: the cache to destroy
Index: linux-2.6.22-rc4-mm2/mm/slob.c
===================================================================
--- linux-2.6.22-rc4-mm2.orig/mm/slob.c	2007-06-17 18:12:22.000000000 -0700
+++ linux-2.6.22-rc4-mm2/mm/slob.c	2007-06-17 18:12:37.000000000 -0700
@@ -558,6 +558,15 @@ int kmem_cache_defrag(int percentage, in
 	return 0;
 }
 
+/*
+ * SLOB does not support slab defragmentation
+ */
+int kmem_cache_vacate(struct page *page)
+{
+	return 0;
+}
+EXPORT_SYMBOL(kmem_cache_vacate);
+
 int kmem_ptr_validate(struct kmem_cache *a, const void *b)
 {
 	return 0;
Index: linux-2.6.22-rc4-mm2/mm/slub.c
===================================================================
--- linux-2.6.22-rc4-mm2.orig/mm/slub.c	2007-06-17 18:12:22.000000000 -0700
+++ linux-2.6.22-rc4-mm2/mm/slub.c	2007-06-17 18:12:37.000000000 -0700
@@ -1038,6 +1038,7 @@ static inline int slab_pad_check(struct 
 static inline int check_object(struct kmem_cache *s, struct page *page,
 			void *object, int active) { return 1; }
 static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
+static inline void remove_full(struct kmem_cache *s, struct page *page) {}
 static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {}
 #define slub_debug 0
 #endif
@@ -1103,12 +1104,11 @@ static struct page *new_slab(struct kmem
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
+
+	page->inuse = 0;
+	page->lockless_freelist = NULL;
 	page->offset = s->offset / sizeof(void *);
 	page->slab = s;
-	page->flags |= 1 << PG_slab;
-	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
-			SLAB_STORE_USER | SLAB_TRACE))
-		SetSlabDebug(page);
 
 	start = page_address(page);
 	end = start + s->objects * s->size;
@@ -1126,11 +1126,20 @@ static struct page *new_slab(struct kmem
 	set_freepointer(s, last, NULL);
 
 	page->freelist = start;
-	page->lockless_freelist = NULL;
-	page->inuse = 0;
-out:
-	if (flags & __GFP_WAIT)
-		local_irq_disable();
+
+	/*
+	 * page->inuse must be 0 when PageSlab(page) becomes
+	 * true so that defrag knows that this slab is not in use.
+	 */
+	smp_wmb();
+	__SetPageSlab(page);
+	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
+			SLAB_STORE_USER | SLAB_TRACE))
+		SetSlabDebug(page);
+
+ out:
+	if (flags & __GFP_WAIT)
+		local_irq_disable();
 	return page;
 }
 
@@ -2654,6 +2663,88 @@ static unsigned long __kmem_cache_shrink
 }
 
 /*
+ * Get a page off a list and freeze it. Must be holding slab lock.
+ */
+static void freeze_from_list(struct kmem_cache *s, struct page *page)
+{
+	if (page->inuse < s->objects)
+		remove_partial(s, page);
+	else if (s->flags & SLAB_STORE_USER)
+		remove_full(s, page);
+	SetSlabFrozen(page);
+}
+
+/*
+ * Attempt to free objects in a page. Return 1 if succesful.
+ */
+int kmem_cache_vacate(struct page *page)
+{
+	unsigned long flags;
+	struct kmem_cache *s;
+	int vacated = 0;
+	void **vector = NULL;
+
+	/*
+	 * Get a reference to the page. Return if its freed or being freed.
+	 * This is necessary to make sure that the page does not vanish
+	 * from under us before we are able to check the result.
+	 */
+	if (!get_page_unless_zero(page))
+		return 0;
+
+	if (!PageSlab(page))
+		goto out;
+
+	s = page->slab;
+	if (!s)
+		goto out;
+
+	vector = kmalloc(s->objects * sizeof(void *), GFP_KERNEL);
+	if (!vector)
+		goto out2;
+
+	local_irq_save(flags);
+	/*
+	 * The implicit memory barrier in slab_lock guarantees that page->inuse
+	 * is loaded after PageSlab(page) has been established to be true.
+	 * Only revelant for a  newly created slab.
+	 */
+	slab_lock(page);
+
+	/*
+	 * We may now have locked a page that may be in various stages of
+	 * being freed. If the PageSlab bit is off then we have already
+	 * reached the page allocator. If page->inuse is zero then we are
+	 * in SLUB but freeing or allocating the page.
+	 * page->inuse is never modified without the slab lock held.
+	 *
+	 * Also abort if the page happens to be already frozen. If its
+	 * frozen then a concurrent vacate may be in progress.
+	 */
+	if (!PageSlab(page) || SlabFrozen(page) || !page->inuse)
+		goto out_locked;
+
+	/*
+	 * We are holding a lock on a slab page and all operations on the
+	 * slab are blocking.
+	 */
+	if (!s->ops->get || !s->ops->kick)
+		goto out_locked;
+	freeze_from_list(s, page);
+	vacated = __kmem_cache_vacate(s, page, flags, vector);
+out:
+	kfree(vector);
+out2:
+	put_page(page);
+	return vacated == 0;
+out_locked:
+	slab_unlock(page);
+	local_irq_restore(flags);
+	goto out;
+
+}
+
+/*
  * kmem_cache_shrink removes empty slabs from the partial lists and sorts
  * the remaining slabs by the number of items in use. The slabs with the
  * most items in use come first. New allocations will then fill those up

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-06-18  9:59 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-18  9:58 [patch 00/26] Current slab allocator / SLUB patch queue clameter
2007-06-18  9:58 ` [patch 01/26] SLUB Debug: Fix initial object debug state of NUMA bootstrap objects clameter
2007-06-18  9:58 ` [patch 02/26] Slab allocators: Consolidate code for krealloc in mm/util.c clameter
2007-06-18 20:03   ` Pekka Enberg
2007-06-18  9:58 ` [patch 03/26] Slab allocators: Consistent ZERO_SIZE_PTR support and NULL result semantics clameter
2007-06-18 20:08   ` Pekka Enberg
2007-06-18  9:58 ` [patch 04/26] Slab allocators: Support __GFP_ZERO in all allocators clameter
2007-06-18 10:09   ` Paul Mundt
2007-06-18 16:17     ` Christoph Lameter
2007-06-18 20:11   ` Pekka Enberg
2007-06-18  9:58 ` [patch 05/26] Slab allocators: Cleanup zeroing allocations clameter
2007-06-18 20:16   ` Pekka Enberg
2007-06-18 20:26     ` Pekka Enberg
2007-06-18 22:34       ` Christoph Lameter
2007-06-19  5:48         ` Pekka Enberg
2007-06-18 21:55     ` Christoph Lameter
2007-06-19 21:00   ` Matt Mackall
2007-06-19 22:33     ` Christoph Lameter
2007-06-20  6:14       ` Pekka J Enberg
2007-06-18  9:58 ` [patch 06/26] Slab allocators: Replace explicit zeroing with __GFP_ZERO clameter
2007-06-19 20:55   ` Pekka Enberg
2007-06-28  6:09   ` Andrew Morton
2007-06-18  9:58 ` [patch 07/26] SLUB: Add some more inlines and #ifdef CONFIG_SLUB_DEBUG clameter
2007-06-18  9:58 ` [patch 08/26] SLUB: Extract dma_kmalloc_cache from get_cache clameter
2007-06-18  9:58 ` [patch 09/26] SLUB: Do proper locking during dma slab creation clameter
2007-06-18  9:58 ` [patch 10/26] SLUB: Faster more efficient slab determination for __kmalloc clameter
2007-06-19 20:08   ` Andrew Morton
2007-06-19 22:22     ` Christoph Lameter
2007-06-19 22:29       ` Andrew Morton
2007-06-19 22:38         ` Christoph Lameter
2007-06-19 22:46           ` Andrew Morton
2007-06-25  6:41             ` Nick Piggin
2007-06-18  9:58 ` [patch 11/26] SLUB: Add support for kmem_cache_ops clameter
2007-06-19 20:58   ` Pekka Enberg
2007-06-19 22:32     ` Christoph Lameter
2007-06-18  9:58 ` [patch 12/26] SLUB: Slab defragmentation core clameter
2007-06-26  8:18   ` Andrew Morton
2007-06-26 18:19     ` Christoph Lameter
2007-06-26 18:38       ` Andrew Morton
2007-06-26 18:52         ` Christoph Lameter
2007-06-26 19:13   ` Nish Aravamudan
2007-06-26 19:19     ` Christoph Lameter
2007-06-18  9:58 ` [patch 13/26] SLUB: Extend slabinfo to support -D and -C options clameter
2007-06-18  9:58 ` [patch 14/26] SLUB: Logic to trigger slab defragmentation from memory reclaim clameter
2007-06-18  9:58 ` [patch 15/26] Slab defrag: Support generic defragmentation for inode slab caches clameter
2007-06-26  8:18   ` Andrew Morton
2007-06-26 18:21     ` Christoph Lameter
2007-06-26 19:28     ` Christoph Lameter
2007-06-26 19:37       ` Andrew Morton
2007-06-26 19:41         ` Christoph Lameter
2007-06-18  9:58 ` [patch 16/26] Slab defragmentation: Support defragmentation for extX filesystem inodes clameter
2007-06-18  9:58 ` [patch 17/26] Slab defragmentation: Support inode defragmentation for xfs clameter
2007-06-18  9:58 ` [patch 18/26] Slab defragmentation: Support procfs inode defragmentation clameter
2007-06-18  9:58 ` [patch 19/26] Slab defragmentation: Support reiserfs " clameter
2007-06-18  9:58 ` [patch 20/26] Slab defragmentation: Support inode defragmentation for sockets clameter
2007-06-18  9:58 ` [patch 21/26] Slab defragmentation: support dentry defragmentation clameter
2007-06-26  8:18   ` Andrew Morton
2007-06-26 18:23     ` Christoph Lameter
2007-06-18  9:59 ` clameter [this message]
2007-06-18  9:59 ` [patch 23/26] SLUB: Move sysfs operations outside of slub_lock clameter
2007-06-18  9:59 ` [patch 24/26] SLUB: Avoid page struct cacheline bouncing due to remote frees to cpu slab clameter
2007-06-18  9:59 ` [patch 25/26] SLUB: Add an object counter to the kmem_cache_cpu structure clameter
2007-06-18  9:59 ` [patch 26/26] SLUB: Place kmem_cache_cpu structures in a NUMA aware way clameter
2007-06-19 23:17   ` Christoph Lameter
2007-06-18 11:57 ` [patch 00/26] Current slab allocator / SLUB patch queue Michal Piotrowski
2007-06-18 16:46   ` Christoph Lameter
2007-06-18 17:38     ` Michal Piotrowski
2007-06-18 18:05       ` Christoph Lameter
2007-06-18 18:58         ` Michal Piotrowski
2007-06-18 19:00           ` Christoph Lameter
2007-06-18 19:09             ` Michal Piotrowski
2007-06-18 19:19               ` Christoph Lameter
2007-06-18 20:43                 ` Michal Piotrowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070618095918.636873785@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    --cc=suresh.b.siddha@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox