From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org
Subject: [RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable
Date: Fri, 21 May 2010 16:15:04 -0500 [thread overview]
Message-ID: <20100521211544.174575855@quilx.com> (raw)
In-Reply-To: <20100521211452.659982351@quilx.com>
[-- Attachment #1: sled_shared_dynamic --]
[-- Type: text/plain, Size: 6393 bytes --]
This makes the size of the shared array configurable. Not that this is a bit
problematic and there are likely unresolved race conditions. The kmem_cache->node[x]
pointers become unstable if interrupts are allowed.
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/slub_def.h | 3 +
mm/slub.c | 133 +++++++++++++++++++++++++++++++++++++++--------
2 files changed, 116 insertions(+), 20 deletions(-)
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2010-05-21 13:47:41.000000000 -0500
@@ -81,11 +81,14 @@ struct kmem_cache {
struct kmem_cache_order_objects oo;
int queue; /* per cpu queue size */
int batch; /* batch size */
+ int shared; /* Shared queue size */
+#ifndef CONFIG_NUMA
/*
* Avoid an extra cache line for UP, SMP and for the node local to
* struct kmem_cache.
*/
struct kmem_cache_node local_node;
+#endif
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects max;
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2010-05-21 13:17:14.000000000 -0500
+++ linux-2.6/mm/slub.c 2010-05-21 13:48:01.000000000 -0500
@@ -1754,7 +1754,7 @@ redo:
int d;
spin_lock(&n->shared_lock);
- d = min(min(s->batch, BOOT_QUEUE_SIZE), n->objects);
+ d = min(min(s->batch, s->shared), n->objects);
if (d > 0) {
memcpy(c->object + c->objects,
n->object + n->objects - d,
@@ -1864,6 +1864,7 @@ void *kmem_cache_alloc_node(struct kmem_
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+
#endif
#ifdef CONFIG_TRACING
@@ -2176,10 +2177,7 @@ static void free_kmem_cache_nodes(struct
int node;
for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = s->node[node];
-
- if (n && n != &s->local_node)
- kfree(n);
+ kfree(s->node[node]);
s->node[node] = NULL;
}
}
@@ -2197,27 +2195,96 @@ static int init_kmem_cache_nodes(struct
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n;
- if (local_node == node)
- n = &s->local_node;
- else {
- if (slab_state == DOWN) {
- early_kmem_cache_node_alloc(gfpflags, node);
- continue;
- }
- n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
- node);
-
- if (!n) {
- free_kmem_cache_nodes(s);
- return 0;
- }
+ if (slab_state == DOWN) {
+ early_kmem_cache_node_alloc(gfpflags, node);
+ continue;
+ }
+ n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags,
+ node);
+ if (!n) {
+ free_kmem_cache_nodes(s);
+ return 0;
}
s->node[node] = n;
init_kmem_cache_node(n, s);
}
return 1;
}
+
+static void resize_shared_queue(struct kmem_cache *s, int shared)
+{
+
+ if (is_kmalloc_cache(s)) {
+ if (shared < BOOT_QUEUE_SIZE) {
+ s->shared = shared;
+ } else {
+ /* More than max. Go to max allowed */
+ s->queue = BOOT_QUEUE_SIZE;
+ s->batch = BOOT_BATCH_SIZE;
+ }
+ } else {
+ int node;
+
+ /* Create the new cpu queue and then free the old one */
+ down_write(&slub_lock);
+
+ /* We can only shrink the queue here since the new
+ * queue size may be smaller and there may be concurrent
+ * slab operations. The upate of the queue must be seen
+ * before the change of the location of the percpu queue.
+ *
+ * Note that the queue may contain more object than the
+ * queue size after this operation.
+ */
+ if (shared < s->shared) {
+ s->shared = shared;
+ barrier();
+ }
+
+
+ /* Serialization has not been worked out yet */
+ for_each_online_node(node) {
+ struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *nn =
+ kmalloc_node(sizeof(struct kmem_cache_node),
+ GFP_KERNEL, node);
+
+ init_kmem_cache_node(nn, s);
+ s->node[node] = nn;
+
+ spin_lock(&nn->list_lock);
+ list_move(&n->partial, &nn->partial);
+#ifdef CONFIG_SLUB_DEBUG
+ list_move(&n->full, &nn->full);
+#endif
+ spin_unlock(&nn->list_lock);
+
+ nn->nr_partial = n->nr_partial;
+#ifdef CONFIG_SLUB_DEBUG
+ nn->nr_slabs = n->nr_slabs;
+ nn->total_objects = n->total_objects;
+#endif
+
+ spin_lock(&nn->shared_lock);
+ nn->objects = n->objects;
+ memcpy(&nn->object, n->object, nn->objects * sizeof(void *));
+ spin_unlock(&nn->shared_lock);
+
+ kfree(n);
+ }
+ /*
+ * If the queue needs to be extended then we deferred
+ * the update until now when the larger sized queue
+ * has been allocated and is working.
+ */
+ if (shared > s->shared)
+ s->shared = shared;
+
+ up_write(&slub_lock);
+ }
+}
+
#else
static void free_kmem_cache_nodes(struct kmem_cache *s)
{
@@ -3989,6 +4056,31 @@ static ssize_t cpu_queue_size_store(stru
}
SLAB_ATTR(cpu_queue_size);
+#ifdef CONFIG_NUMA
+static ssize_t shared_queue_size_show(struct kmem_cache *s, char *buf)
+{
+ return sprintf(buf, "%u\n", s->shared);
+}
+
+static ssize_t shared_queue_size_store(struct kmem_cache *s,
+ const char *buf, size_t length)
+{
+ unsigned long queue;
+ int err;
+
+ err = strict_strtoul(buf, 10, &queue);
+ if (err)
+ return err;
+
+ if (queue > 10000 || queue < s->batch)
+ return -EINVAL;
+
+ resize_shared_queue(s, queue);
+ return length;
+}
+SLAB_ATTR(shared_queue_size);
+#endif
+
static ssize_t cpu_batch_size_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%u\n", s->batch);
@@ -4388,6 +4480,7 @@ static struct attribute *slab_attrs[] =
&cache_dma_attr.attr,
#endif
#ifdef CONFIG_NUMA
+ &shared_queue_size_attr.attr,
&remote_node_defrag_ratio_attr.attr,
#endif
#ifdef CONFIG_SLUB_STATS
@@ -4720,7 +4813,7 @@ static int s_show(struct seq_file *m, vo
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
nr_objs, s->size, oo_objects(s->oo),
(1 << oo_order(s->oo)));
- seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, 0);
+ seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, s->shared);
seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
0UL);
seq_putc(m, '\n');
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-05-21 21:19 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-21 21:14 [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 01/14] slab: Introduce a constant for a unspecified node Christoph Lameter
2010-06-07 21:44 ` David Rientjes
2010-06-07 22:30 ` Christoph Lameter
2010-06-08 5:41 ` Pekka Enberg
2010-06-08 6:20 ` David Rientjes
2010-06-08 6:34 ` Pekka Enberg
2010-06-08 23:35 ` David Rientjes
2010-06-09 5:55 ` Pekka Enberg
2010-06-09 6:20 ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 02/14] SLUB: Constants need UL Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 03/14] SLUB: Use kmem_cache flags to detect if Slab is in debugging mode Christoph Lameter
2010-06-08 3:57 ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 04/14] SLUB: discard_slab_unlock Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 05/14] SLUB: is_kmalloc_cache Christoph Lameter
2010-06-08 8:54 ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 06/14] SLUB: Get rid of the kmalloc_node slab Christoph Lameter
2010-06-09 6:14 ` David Rientjes
2010-06-09 16:14 ` Christoph Lameter
2010-06-09 16:26 ` Pekka Enberg
2010-06-10 6:07 ` Pekka Enberg
2010-05-21 21:14 ` [RFC V2 SLEB 07/14] SLEB: The Enhanced Slab Allocator Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 08/14] SLEB: Resize cpu queue Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 09/14] SLED: Get rid of useless function Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 10/14] SLEB: Remove MAX_OBJS limitation Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 11/14] SLEB: Add per node cache (with a fixed size for now) Christoph Lameter
2010-05-21 21:15 ` Christoph Lameter [this message]
2010-05-21 21:15 ` [RFC V2 SLEB 13/14] SLEB: Enhanced NUMA support Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches Christoph Lameter
2010-05-22 8:37 ` [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Pekka Enberg
2010-05-24 7:03 ` Nick Piggin
2010-05-24 15:06 ` Christoph Lameter
2010-05-25 2:06 ` Nick Piggin
2010-05-25 6:55 ` Pekka Enberg
2010-05-25 7:07 ` Nick Piggin
2010-05-25 8:03 ` Pekka Enberg
2010-05-25 8:16 ` Nick Piggin
2010-05-25 9:19 ` Pekka Enberg
2010-05-25 9:34 ` Nick Piggin
2010-05-25 9:53 ` Pekka Enberg
2010-05-25 10:19 ` Nick Piggin
2010-05-25 10:45 ` Pekka Enberg
2010-05-25 11:06 ` Nick Piggin
2010-05-25 15:13 ` Linus Torvalds
2010-05-25 15:43 ` Nick Piggin
2010-05-25 17:02 ` Pekka Enberg
2010-05-25 17:19 ` Nick Piggin
2010-05-25 17:35 ` Pekka Enberg
2010-05-25 17:40 ` Nick Piggin
2010-05-25 10:07 ` David Rientjes
2010-05-25 10:02 ` David Rientjes
2010-05-25 10:47 ` Pekka Enberg
2010-05-25 19:57 ` David Rientjes
2010-05-25 14:13 ` Christoph Lameter
2010-05-25 14:34 ` Nick Piggin
2010-05-25 14:43 ` Nick Piggin
2010-05-25 14:48 ` Christoph Lameter
2010-05-25 15:11 ` Nick Piggin
2010-05-25 15:28 ` Christoph Lameter
2010-05-25 15:37 ` Nick Piggin
2010-05-27 14:24 ` Christoph Lameter
2010-05-27 14:37 ` Nick Piggin
2010-05-27 15:52 ` Christoph Lameter
2010-05-27 16:07 ` Nick Piggin
2010-05-27 16:57 ` Christoph Lameter
2010-05-28 8:39 ` Nick Piggin
2010-05-25 14:40 ` Nick Piggin
2010-05-25 14:48 ` Christoph Lameter
2010-05-25 15:12 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100521211544.174575855@quilx.com \
--to=cl@linux.com \
--cc=linux-mm@kvack.org \
--cc=penberg@cs.helsinki.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox