[RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org
Subject: [RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable
Date: Fri, 21 May 2010 16:15:04 -0500	[thread overview]
Message-ID: <20100521211544.174575855@quilx.com> (raw)
In-Reply-To: <20100521211452.659982351@quilx.com>

[-- Attachment #1: sled_shared_dynamic --]
[-- Type: text/plain, Size: 6393 bytes --]

This makes the size of the shared array configurable. Not that this is a bit
problematic and there are likely unresolved race conditions. The kmem_cache->node[x]
pointers become unstable if interrupts are allowed.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 include/linux/slub_def.h |    3 +
 mm/slub.c                |  133 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 116 insertions(+), 20 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2010-05-21 13:47:41.000000000 -0500
@@ -81,11 +81,14 @@ struct kmem_cache {
 	struct kmem_cache_order_objects oo;
 	int queue;		/* per cpu queue size */
 	int batch;		/* batch size */
+	int shared;		/* Shared queue size */
+#ifndef CONFIG_NUMA
 	/*
 	 * Avoid an extra cache line for UP, SMP and for the node local to
 	 * struct kmem_cache.
 	 */
 	struct kmem_cache_node local_node;
+#endif
 
 	/* Allocation and freeing of slabs */
 	struct kmem_cache_order_objects max;
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/mm/slub.c	2010-05-21 13:48:01.000000000 -0500
@@ -1754,7 +1754,7 @@ redo:
 				int d;
 
 				spin_lock(&n->shared_lock);
-				d = min(min(s->batch, BOOT_QUEUE_SIZE), n->objects);
+				d = min(min(s->batch, s->shared), n->objects);
 				if (d > 0) {
 					memcpy(c->object + c->objects,
 						n->object + n->objects - d,
@@ -1864,6 +1864,7 @@ void *kmem_cache_alloc_node(struct kmem_
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
+
 #endif
 
 #ifdef CONFIG_TRACING
@@ -2176,10 +2177,7 @@ static void free_kmem_cache_nodes(struct
 	int node;
 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
-		struct kmem_cache_node *n = s->node[node];
-
-		if (n && n != &s->local_node)
-			kfree(n);
+		kfree(s->node[node]);
 		s->node[node] = NULL;
 	}
 }
@@ -2197,27 +2195,96 @@ static int init_kmem_cache_nodes(struct 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n;
 
-		if (local_node == node)
-			n = &s->local_node;
-		else {
-			if (slab_state == DOWN) {
-				early_kmem_cache_node_alloc(gfpflags, node);
-				continue;
-			}
-			n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
-				node);
-
-			if (!n) {
-				free_kmem_cache_nodes(s);
-				return 0;
-			}
+		if (slab_state == DOWN) {
+			early_kmem_cache_node_alloc(gfpflags, node);
+			continue;
+		}
+		n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
+			node);
 
+		if (!n) {
+			free_kmem_cache_nodes(s);
+			return 0;
 		}
 		s->node[node] = n;
 		init_kmem_cache_node(n, s);
 	}
 	return 1;
 }
+
+static void resize_shared_queue(struct kmem_cache *s, int shared)
+{
+
+	if (is_kmalloc_cache(s)) {
+		if (shared < BOOT_QUEUE_SIZE) {
+			s->shared = shared;
+		} else {
+			/* More than max. Go to max allowed */
+			s->queue = BOOT_QUEUE_SIZE;
+			s->batch = BOOT_BATCH_SIZE;
+		}
+	} else {
+		int node;
+
+		/* Create the new cpu queue and then free the old one */
+		down_write(&slub_lock);
+
+		/* We can only shrink the queue here since the new
+		 * queue size may be smaller and there may be concurrent
+		 * slab operations. The upate of the queue must be seen
+		 * before the change of the location of the percpu queue.
+		 *
+		 * Note that the queue may contain more object than the
+		 * queue size after this operation.
+		 */
+		if (shared < s->shared) {
+			s->shared = shared;
+			barrier();
+		}
+
+
+		/* Serialization has not been worked out yet */
+		for_each_online_node(node) {
+			struct kmem_cache_node *n = get_node(s, node);
+			struct kmem_cache_node *nn =
+				kmalloc_node(sizeof(struct kmem_cache_node),
+					GFP_KERNEL, node);
+
+			init_kmem_cache_node(nn, s);
+			s->node[node] = nn;
+
+			spin_lock(&nn->list_lock);
+			list_move(&n->partial, &nn->partial);
+#ifdef CONFIG_SLUB_DEBUG
+			list_move(&n->full, &nn->full);
+#endif
+			spin_unlock(&nn->list_lock);
+
+			nn->nr_partial = n->nr_partial;
+#ifdef CONFIG_SLUB_DEBUG
+			nn->nr_slabs = n->nr_slabs;
+			nn->total_objects = n->total_objects;
+#endif
+
+			spin_lock(&nn->shared_lock);
+			nn->objects = n->objects;
+			memcpy(&nn->object, n->object, nn->objects * sizeof(void *));
+			spin_unlock(&nn->shared_lock);
+
+			kfree(n);
+		}
+		/*
+		 * If the queue needs to be extended then we deferred
+		 * the update until now when the larger sized queue
+		 * has been allocated and is working.
+		 */
+		if (shared > s->shared)
+			s->shared = shared;
+
+		up_write(&slub_lock);
+	}
+}
+
 #else
 static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
@@ -3989,6 +4056,31 @@ static ssize_t cpu_queue_size_store(stru
 }
 SLAB_ATTR(cpu_queue_size);
 
+#ifdef CONFIG_NUMA
+static ssize_t shared_queue_size_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%u\n", s->shared);
+}
+
+static ssize_t shared_queue_size_store(struct kmem_cache *s,
+			 const char *buf, size_t length)
+{
+	unsigned long queue;
+	int err;
+
+	err = strict_strtoul(buf, 10, &queue);
+	if (err)
+		return err;
+
+	if (queue > 10000 || queue < s->batch)
+		return -EINVAL;
+
+	resize_shared_queue(s, queue);
+	return length;
+}
+SLAB_ATTR(shared_queue_size);
+#endif
+
 static ssize_t cpu_batch_size_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%u\n", s->batch);
@@ -4388,6 +4480,7 @@ static struct attribute *slab_attrs[] = 
 	&cache_dma_attr.attr,
 #endif
 #ifdef CONFIG_NUMA
+	&shared_queue_size_attr.attr,
 	&remote_node_defrag_ratio_attr.attr,
 #endif
 #ifdef CONFIG_SLUB_STATS
@@ -4720,7 +4813,7 @@ static int s_show(struct seq_file *m, vo
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
 		   nr_objs, s->size, oo_objects(s->oo),
 		   (1 << oo_order(s->oo)));
-	seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, 0);
+	seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, s->shared);
 	seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
 		   0UL);
 	seq_putc(m, '\n');

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2010-05-21 21:19 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-21 21:14 [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 01/14] slab: Introduce a constant for a unspecified node Christoph Lameter
2010-06-07 21:44   ` David Rientjes
2010-06-07 22:30     ` Christoph Lameter
2010-06-08  5:41       ` Pekka Enberg
2010-06-08  6:20         ` David Rientjes
2010-06-08  6:34           ` Pekka Enberg
2010-06-08 23:35             ` David Rientjes
2010-06-09  5:55               ` Pekka Enberg
2010-06-09  6:20                 ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 02/14] SLUB: Constants need UL Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 03/14] SLUB: Use kmem_cache flags to detect if Slab is in debugging mode Christoph Lameter
2010-06-08  3:57   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 04/14] SLUB: discard_slab_unlock Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 05/14] SLUB: is_kmalloc_cache Christoph Lameter
2010-06-08  8:54   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 06/14] SLUB: Get rid of the kmalloc_node slab Christoph Lameter
2010-06-09  6:14   ` David Rientjes
2010-06-09 16:14     ` Christoph Lameter
2010-06-09 16:26       ` Pekka Enberg
2010-06-10  6:07         ` Pekka Enberg
2010-05-21 21:14 ` [RFC V2 SLEB 07/14] SLEB: The Enhanced Slab Allocator Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 08/14] SLEB: Resize cpu queue Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 09/14] SLED: Get rid of useless function Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 10/14] SLEB: Remove MAX_OBJS limitation Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 11/14] SLEB: Add per node cache (with a fixed size for now) Christoph Lameter
2010-05-21 21:15 ` Christoph Lameter [this message]
2010-05-21 21:15 ` [RFC V2 SLEB 13/14] SLEB: Enhanced NUMA support Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches Christoph Lameter
2010-05-22  8:37 ` [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Pekka Enberg
2010-05-24  7:03 ` Nick Piggin
2010-05-24 15:06   ` Christoph Lameter
2010-05-25  2:06     ` Nick Piggin
2010-05-25  6:55       ` Pekka Enberg
2010-05-25  7:07         ` Nick Piggin
2010-05-25  8:03           ` Pekka Enberg
2010-05-25  8:16             ` Nick Piggin
2010-05-25  9:19               ` Pekka Enberg
2010-05-25  9:34                 ` Nick Piggin
2010-05-25  9:53                   ` Pekka Enberg
2010-05-25 10:19                     ` Nick Piggin
2010-05-25 10:45                       ` Pekka Enberg
2010-05-25 11:06                         ` Nick Piggin
2010-05-25 15:13                         ` Linus Torvalds
2010-05-25 15:43                           ` Nick Piggin
2010-05-25 17:02                             ` Pekka Enberg
2010-05-25 17:19                               ` Nick Piggin
2010-05-25 17:35                                 ` Pekka Enberg
2010-05-25 17:40                                   ` Nick Piggin
2010-05-25 10:07               ` David Rientjes
2010-05-25 10:02             ` David Rientjes
2010-05-25 10:47               ` Pekka Enberg
2010-05-25 19:57                 ` David Rientjes
2010-05-25 14:13       ` Christoph Lameter
2010-05-25 14:34         ` Nick Piggin
2010-05-25 14:43           ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:11             ` Nick Piggin
2010-05-25 15:28               ` Christoph Lameter
2010-05-25 15:37                 ` Nick Piggin
2010-05-27 14:24                   ` Christoph Lameter
2010-05-27 14:37                     ` Nick Piggin
2010-05-27 15:52                       ` Christoph Lameter
2010-05-27 16:07                         ` Nick Piggin
2010-05-27 16:57                           ` Christoph Lameter
2010-05-28  8:39                             ` Nick Piggin
2010-05-25 14:40         ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:12             ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521211544.174575855@quilx.com \
    --to=cl@linux.com \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox