[RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org
Subject: [RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches
Date: Fri, 21 May 2010 16:15:06 -0500	[thread overview]
Message-ID: <20100521211545.336946412@quilx.com> (raw)
In-Reply-To: <20100521211452.659982351@quilx.com>

[-- Attachment #1: sled_off_node_from_shared --]
[-- Type: text/plain, Size: 7316 bytes --]

This is in a draft state.

Leave the cpu queue alone for off node accesses and go directly to the
remote shared cache for alloations.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 include/linux/slub_def.h |    1 
 mm/slub.c                |  184 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 142 insertions(+), 43 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2010-05-21 15:30:47.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2010-05-21 15:34:45.000000000 -0500
@@ -42,7 +42,6 @@ struct kmem_cache_cpu {
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
 	int objects;		/* Number of objects available */
-	int node;		/* The node of the page (or -1 for debug) */
 	void *object[BOOT_QUEUE_SIZE];		/* List of objects */
 };
 
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2010-05-21 15:30:47.000000000 -0500
+++ linux-2.6/mm/slub.c	2010-05-21 15:37:04.000000000 -0500
@@ -1616,19 +1616,6 @@ static void resize_cpu_queue(struct kmem
 	}
 }
 
-/*
- * Check if the objects in a per cpu structure fit numa
- * locality expectations.
- */
-static inline int node_match(struct kmem_cache_cpu *c, int node)
-{
-#ifdef CONFIG_NUMA
-	if (node != -1 && c->node != node)
-		return 0;
-#endif
-	return 1;
-}
-
 static unsigned long count_partial(struct kmem_cache_node *n,
 					int (*get_count)(struct page *))
 {
@@ -1718,9 +1705,9 @@ void retrieve_objects(struct kmem_cache 
 	}
 }
 
+#ifdef CONFIG_NUMA
 static inline int find_numa_node(struct kmem_cache *s, int selected_node)
 {
-#ifdef CONFIG_NUMA
 	if (s->flags & SLAB_MEM_SPREAD &&
 			!in_interrupt() &&
 			selected_node == SLAB_NODE_UNSPECIFIED) {
@@ -1731,10 +1718,113 @@ static inline int find_numa_node(struct 
 		if (current->mempolicy)
 			return slab_node(current->mempolicy);
 	}
-#endif
 	return selected_node;
 }
 
+/*
+ * Try to allocate a partial slab from a specific node.
+ */
+static struct page *__get_partial_node(struct kmem_cache_node *n)
+{
+	struct page *page;
+
+	if (!n->nr_partial)
+		return NULL;
+
+	list_for_each_entry(page, &n->partial, lru)
+		if (lock_and_freeze_slab(n, page))
+			goto out;
+	page = NULL;
+out:
+	return page;
+}
+
+
+void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags)
+{
+	void *object = NULL;
+	struct kmem_cache_node *n = get_node(s, node);
+
+	spin_lock(&n->shared_lock);
+
+	while (!object) {
+		/* Direct allocation from remote shared cache */
+		if (n->objects) {
+#if 0
+			/* Taking a hot object remotely  */
+			object = n->object[--n->objects];
+#else
+			/* Take a cold object from the remote shared cache */
+			object = n->object[0];
+			n->objects--;
+			memcpy(n->object, n->object + 1, n->objects * sizeof(void *));
+#endif
+			break;
+		}
+
+		while (n->objects < s->batch) {
+			struct page *new;
+			int d;
+
+			/* Should be getting cold remote page !! This is hot */
+			new = __get_partial_node(n);
+			if (unlikely(!new)) {
+
+				spin_unlock(&n->shared_lock);
+
+				if (gfpflags & __GFP_WAIT)
+					local_irq_enable();
+
+				new = new_slab(s, gfpflags, node);
+
+				if (gfpflags & __GFP_WAIT)
+					local_irq_disable();
+
+				spin_lock(&n->shared_lock);
+
+ 				if (!new)
+					goto out;
+
+				stat(s, ALLOC_SLAB);
+				slab_lock(new);
+			} else
+				stat(s, ALLOC_FROM_PARTIAL);
+
+			d = min(s->batch - n->objects, available(new));
+			retrieve_objects(s, new, n->object + n->objects, d);
+			n->objects += d;
+
+			if (!all_objects_used(new))
+
+				add_partial(get_node(s, page_to_nid(new)), new, 1);
+
+			else
+				add_full(s, get_node(s, page_to_nid(new)), new);
+
+			slab_unlock(new);
+		}
+	}
+out:
+	spin_unlock(&n->shared_lock);
+	return object;
+}
+
+/*
+ * Check if the objects in a per cpu structure fit numa
+ * locality expectations.
+ */
+static inline int node_local(int node)
+{
+	if (node != -1 || numa_node_id() != node)
+		return 0;
+	return 1;
+}
+
+#else
+static inline int find_numa_node(struct kmem_cache *s, int selected_node) { return selected_node; }
+static inline void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags) { return NULL; }
+static inline int node_local(int node) { return 1; }
+#endif
 
 static void *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, unsigned long addr)
@@ -1753,36 +1843,41 @@ redo:
 	node = find_numa_node(s, node);
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
-	if (unlikely(!c->objects || !node_match(c, node))) {
+	if (unlikely(!c->objects || !node_local(node))) {
+
+		struct kmem_cache_node *n;
 
 		gfpflags &= gfp_allowed_mask;
 
-		if (unlikely(!node_match(c, node))) {
-			flush_cpu_objects(s, c);
-			c->node = node;
-		} else {
-			struct kmem_cache_node *n = get_node(s, c->node);
+		if (unlikely(!node_local(node))) {
+			object = off_node_alloc(s, node, gfpflags);
+			if (!object)
+				goto oom;
+			else
+				goto got_object;
+		}
 
-			/*
-			 * Node specified is matching the stuff that we cache,
-			 * so we could retrieve objects from the shared cache
-			 * of the indicated node if there would be anything
-			 * there.
-			 */
-			if (n->objects) {
-				int d;
+		n = get_node(s, numa_node_id());
 
-				spin_lock(&n->shared_lock);
-				d = min(min(s->batch, s->shared), n->objects);
-				if (d > 0) {
-					memcpy(c->object + c->objects,
-						n->object + n->objects - d,
-						d * sizeof(void *));
-					n->objects -= d;
-					c->objects += d;
-				}
-				spin_unlock(&n->shared_lock);
+		/*
+		 * Node specified is matching the stuff that we cache,
+		 * so we could retrieve objects from the shared cache
+		 * of the indicated node if there would be anything
+		 * there.
+		 */
+		if (n->objects) {
+			int d;
+
+			spin_lock(&n->shared_lock);
+			d = min(min(s->batch, s->shared), n->objects);
+			if (d > 0) {
+				memcpy(c->object + c->objects,
+					n->object + n->objects - d,
+					d * sizeof(void *));
+				n->objects -= d;
+				c->objects += d;
 			}
+			spin_unlock(&n->shared_lock);
 		}
 
 		while (c->objects < s->batch) {
@@ -1833,6 +1928,8 @@ redo:
 
 	object = c->object[--c->objects];
 
+got_object:
+
 	if (unlikely(debug_on(s))) {
 		if (!alloc_debug_processing(s, object, addr))
 			goto redo;
@@ -1962,8 +2059,10 @@ static void slab_free(struct kmem_cache 
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(object, s->objsize);
 
+#ifdef CONFIG_NUMA
 	if (numa_off_node_free(s, x))
 		goto out;
+#endif
 
 	if (unlikely(c->objects >= s->queue)) {
 
@@ -3941,8 +4040,9 @@ static ssize_t show_slab_objects(struct 
 
 		for_each_possible_cpu(cpu) {
 			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+			int node = cpu_to_node(cpu);
 
-			if (!c || c->node < 0)
+			if (!c)
 				continue;
 
 			if (c->objects) {
@@ -3954,9 +4054,9 @@ static ssize_t show_slab_objects(struct 
 					x = 1;
 
 				total += x;
-				nodes[c->node] += x;
+				nodes[node] += x;
 			}
-			per_cpu[c->node]++;
+			per_cpu[node]++;
 		}
 	}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2010-05-21 21:19 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-21 21:14 [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 01/14] slab: Introduce a constant for a unspecified node Christoph Lameter
2010-06-07 21:44   ` David Rientjes
2010-06-07 22:30     ` Christoph Lameter
2010-06-08  5:41       ` Pekka Enberg
2010-06-08  6:20         ` David Rientjes
2010-06-08  6:34           ` Pekka Enberg
2010-06-08 23:35             ` David Rientjes
2010-06-09  5:55               ` Pekka Enberg
2010-06-09  6:20                 ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 02/14] SLUB: Constants need UL Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 03/14] SLUB: Use kmem_cache flags to detect if Slab is in debugging mode Christoph Lameter
2010-06-08  3:57   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 04/14] SLUB: discard_slab_unlock Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 05/14] SLUB: is_kmalloc_cache Christoph Lameter
2010-06-08  8:54   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 06/14] SLUB: Get rid of the kmalloc_node slab Christoph Lameter
2010-06-09  6:14   ` David Rientjes
2010-06-09 16:14     ` Christoph Lameter
2010-06-09 16:26       ` Pekka Enberg
2010-06-10  6:07         ` Pekka Enberg
2010-05-21 21:14 ` [RFC V2 SLEB 07/14] SLEB: The Enhanced Slab Allocator Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 08/14] SLEB: Resize cpu queue Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 09/14] SLED: Get rid of useless function Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 10/14] SLEB: Remove MAX_OBJS limitation Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 11/14] SLEB: Add per node cache (with a fixed size for now) Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 13/14] SLEB: Enhanced NUMA support Christoph Lameter
2010-05-21 21:15 ` Christoph Lameter [this message]
2010-05-22  8:37 ` [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Pekka Enberg
2010-05-24  7:03 ` Nick Piggin
2010-05-24 15:06   ` Christoph Lameter
2010-05-25  2:06     ` Nick Piggin
2010-05-25  6:55       ` Pekka Enberg
2010-05-25  7:07         ` Nick Piggin
2010-05-25  8:03           ` Pekka Enberg
2010-05-25  8:16             ` Nick Piggin
2010-05-25  9:19               ` Pekka Enberg
2010-05-25  9:34                 ` Nick Piggin
2010-05-25  9:53                   ` Pekka Enberg
2010-05-25 10:19                     ` Nick Piggin
2010-05-25 10:45                       ` Pekka Enberg
2010-05-25 11:06                         ` Nick Piggin
2010-05-25 15:13                         ` Linus Torvalds
2010-05-25 15:43                           ` Nick Piggin
2010-05-25 17:02                             ` Pekka Enberg
2010-05-25 17:19                               ` Nick Piggin
2010-05-25 17:35                                 ` Pekka Enberg
2010-05-25 17:40                                   ` Nick Piggin
2010-05-25 10:07               ` David Rientjes
2010-05-25 10:02             ` David Rientjes
2010-05-25 10:47               ` Pekka Enberg
2010-05-25 19:57                 ` David Rientjes
2010-05-25 14:13       ` Christoph Lameter
2010-05-25 14:34         ` Nick Piggin
2010-05-25 14:43           ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:11             ` Nick Piggin
2010-05-25 15:28               ` Christoph Lameter
2010-05-25 15:37                 ` Nick Piggin
2010-05-27 14:24                   ` Christoph Lameter
2010-05-27 14:37                     ` Nick Piggin
2010-05-27 15:52                       ` Christoph Lameter
2010-05-27 16:07                         ` Nick Piggin
2010-05-27 16:57                           ` Christoph Lameter
2010-05-28  8:39                             ` Nick Piggin
2010-05-25 14:40         ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:12             ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521211545.336946412@quilx.com \
    --to=cl@linux.com \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox