Re: NUMA aware slab allocator V3

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@osdl.org>
To: Christoph Lameter <clameter@engr.sgi.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	shai@scalex86.org, steiner@sgi.com
Subject: Re: NUMA aware slab allocator V3
Date: Sat, 14 May 2005 00:42:04 -0700	[thread overview]
Message-ID: <20050514004204.2302dc52.akpm@osdl.org> (raw)
In-Reply-To: <Pine.LNX.4.62.0505131823210.12315@schroedinger.engr.sgi.com>

Christoph Lameter <clameter@engr.sgi.com> wrote:
>
> This patch allows kmalloc_node to be as fast as kmalloc by introducing
> node specific page lists for partial, free and full slabs.

Oh drat - what happened to all the coding-style fixups?  Redone patch
below.  Please merge - slab.c is already not a nice place to visit.

> +#ifndef CONFIG_NUMA
> +#if MAX_NUMNODES != 1
> +#error "Broken Configuration: CONFIG_NUMA not set but MAX_NUMNODES !=1 !!"
> +#endif
> +#endif

Well that's doing to make it fail to compile at all on ppc64.

>  {
>  #ifdef CONFIG_SMP
>  	check_irq_off();
> -	BUG_ON(spin_trylock(&cachep->spinlock));
> +	BUG_ON(spin_trylock(&list3_data(cachep)->list_lock));
> +#endif

We can use assert_spin_lcoked() here now btw.


I hacked things to compile by setting NDOES_SHIFT to zero and the machine
boots.  I'll leave that hack in place for the while, so -mm is busted on
ppc64 NUMA.  Please sort things out with the ppc64 guys?


diff -puN mm/slab.c~numa-aware-slab-allocator-v2-tidy mm/slab.c
--- 25/mm/slab.c~numa-aware-slab-allocator-v2-tidy	2005-05-14 00:08:02.000000000 -0700
+++ 25-akpm/mm/slab.c	2005-05-14 00:16:41.000000000 -0700
@@ -356,7 +356,7 @@ static inline int index_of(const size_t 
 		(parent)->list_lock = SPIN_LOCK_UNLOCKED;	\
 		(parent)->free_objects = 0;	\
 		(parent)->free_touched = 0;	\
-	} while(0)
+	} while (0)
 #else
 
 #define LIST3_INIT(parent) \
@@ -368,21 +368,21 @@ static inline int index_of(const size_t 
 		(parent)->list_lock = SPIN_LOCK_UNLOCKED;	\
 		(parent)->free_objects = 0;	\
 		(parent)->free_touched = 0;	\
-	} while(0)
+	} while (0)
 #endif
 
 #define MAKE_LIST(cachep, listp, slab, nodeid)	\
 	do {	\
 		INIT_LIST_HEAD(listp);		\
 		list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
-	}while(0)
+	} while (0)
 
 #define	MAKE_ALL_LISTS(cachep, ptr, nodeid)			\
 	do {					\
 	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\
-	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid);	\
+	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
-	}while(0)
+	} while (0)
 
 #define list3_data(cachep) \
 	((cachep->nodelists[numa_node_id()]))
@@ -807,15 +807,15 @@ static inline struct array_cache **alloc
 	if (limit > 1)
 		limit = 12;
 	ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
-	if(ac_ptr) {
+	if (ac_ptr) {
 		for (i = 0; i < MAX_NUMNODES; i++) {
 			if (i == node) {
 				ac_ptr[i] = NULL;
 				continue;
 			}
 			ac_ptr[i] = alloc_arraycache(cpu, limit, 0xbaadf00d);
-			if(!ac_ptr[i]) {
-				for(i--; i <=0; i--)
+			if (!ac_ptr[i]) {
+				for (i--; i <=0; i--)
 					kfree(ac_ptr[i]);
 				kfree(ac_ptr);
 				return NULL;
@@ -829,7 +829,7 @@ static inline void free_alien_cache(stru
 {
 	int i;
 
-	if(!ac_ptr)
+	if (!ac_ptr)
 		return;
 	for (i = 0; i < MAX_NUMNODES; i++)
 		kfree(ac_ptr[i]);
@@ -841,7 +841,7 @@ static inline void __drain_alien_cache(k
 {
 	struct kmem_list3 *rl3 = cachep->nodelists[node];
 
-	if(ac->avail) {
+	if (ac->avail) {
 		spin_lock(&rl3->list_lock);
 		free_block(cachep, ac->entry, ac->avail);
 		ac->avail = 0;
@@ -857,7 +857,7 @@ static void drain_alien_cache(kmem_cache
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		ac = l3->alien[i];
-		if(ac) {
+		if (ac) {
 			spin_lock_irqsave(&ac->lock, flags);
 			__drain_alien_cache(cachep, ac, i);
 			spin_unlock_irqrestore(&ac->lock, flags);
@@ -891,12 +891,12 @@ static int __devinit cpuup_callback(stru
 			 * node has not already allocated this
 			 */
 			if (!cachep->nodelists[node]) {
-				if(!(l3 = kmalloc_node(memsize,
+				if (!(l3 = kmalloc_node(memsize,
 						GFP_KERNEL, node)))
 					goto bad;
 				LIST3_INIT(l3);
 				l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
-					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+				  ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
 
 				cachep->nodelists[node] = l3;
 			}
@@ -919,8 +919,8 @@ static int __devinit cpuup_callback(stru
 
 			l3 = cachep->nodelists[node];
 			BUG_ON(!l3);
-			if(!l3->shared) {
-				if(!(nc = alloc_arraycache(cpu,
+			if (!l3->shared) {
+				if (!(nc = alloc_arraycache(cpu,
 					cachep->shared*cachep->batchcount,
 					0xbaadf00d)))
 					goto  bad;
@@ -952,29 +952,29 @@ static int __devinit cpuup_callback(stru
 			cachep->array[cpu] = NULL;
 			l3 = cachep->nodelists[node];
 
-			if(!l3)
+			if (!l3)
 				goto unlock_cache;
 
 			spin_lock(&l3->list_lock);
 
 			/* Free limit for this kmem_list3 */
 			l3->free_limit -= cachep->batchcount;
-			if(nc)
+			if (nc)
 				free_block(cachep, nc->entry, nc->avail);
 
-			if(!cpus_empty(mask)) {
+			if (!cpus_empty(mask)) {
                                 spin_unlock(&l3->list_lock);
                                 goto unlock_cache;
                         }
 
-			if(l3->shared) {
+			if (l3->shared) {
 				free_block(cachep, l3->shared->entry,
 						l3->shared->avail);
 				kfree(l3->shared);
 				l3->shared = NULL;
 			}
 #ifdef CONFIG_NUMA
-			if(l3->alien) {
+			if (l3->alien) {
 				drain_alien_cache(cachep, l3);
 				free_alien_cache(l3->alien);
 				l3->alien = NULL;
@@ -982,13 +982,13 @@ static int __devinit cpuup_callback(stru
 #endif
 
 			/* free slabs belonging to this node */
-			if(__node_shrink(cachep, node)) {
+			if (__node_shrink(cachep, node)) {
 				cachep->nodelists[node] = NULL;
 				spin_unlock(&l3->list_lock);
 				kfree(l3);
-			}
-			else
+			} else {
 				spin_unlock(&l3->list_lock);
+			}
 unlock_cache:
 			spin_unlock_irq(&cachep->spinlock);
 			kfree(nc);
@@ -1034,7 +1034,7 @@ void __init kmem_cache_init(void)
 	struct cache_names *names;
 	int i;
 
-	for(i = 0; i < NUM_INIT_LISTS; i++) {
+	for (i = 0; i < NUM_INIT_LISTS; i++) {
 		LIST3_INIT(&initkmem_list3[i]);
 		if (i < MAX_NUMNODES)
 			cache_cache.nodelists[i] = NULL;
@@ -1101,16 +1101,19 @@ void __init kmem_cache_init(void)
 				(ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
 
 	if (INDEX_AC != INDEX_L3)
-		sizes[INDEX_L3].cs_cachep = kmem_cache_create(names[INDEX_L3].name,
-					 sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN,
-					 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
+		sizes[INDEX_L3].cs_cachep =
+			kmem_cache_create(names[INDEX_L3].name,
+				sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN,
+				(ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
 
 	while (sizes->cs_size != ULONG_MAX) {
-		/* For performance, all the general caches are L1 aligned.
+		/*
+		 * For performance, all the general caches are L1 aligned.
 		 * This should be particularly beneficial on SMP boxes, as it
 		 * eliminates "false sharing".
 		 * Note for systems short on memory removing the alignment will
-		 * allow tighter packing of the smaller caches. */
+		 * allow tighter packing of the smaller caches.
+		 */
 		if(!sizes->cs_cachep)
 			sizes->cs_cachep = kmem_cache_create(names->name,
 				sizes->cs_size, ARCH_KMALLOC_MINALIGN,
@@ -1150,7 +1153,8 @@ void __init kmem_cache_init(void)
 				!= &initarray_generic.cache);
 		memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep),
 				sizeof(struct arraycache_init));
-		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr;
+		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
+						ptr;
 		local_irq_enable();
 	}
 	/* 5) Replace the bootstrap kmem_list3's */
@@ -1160,8 +1164,8 @@ void __init kmem_cache_init(void)
 		init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
 				numa_node_id());
 
-		for (j=0; j < MAX_NUMNODES; j++) {
-			if(is_node_online(j))
+		for (j = 0; j < MAX_NUMNODES; j++) {
+			if (is_node_online(j))
 				init_list(malloc_sizes[INDEX_L3].cs_cachep,
 						&initkmem_list3[SIZE_L3+j], j);
 		}
@@ -1489,8 +1493,9 @@ static void slab_destroy (kmem_cache_t *
 static inline void set_up_list3s(kmem_cache_t *cachep)
 {
 	int i;
-	for(i = 0; i < MAX_NUMNODES; i++) {
-		if(is_node_online(i)) {
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (is_node_online(i)) {
 			cachep->nodelists[i] = &initkmem_list3[SIZE_L3+i];
 			cachep->nodelists[i]->next_reap = jiffies +
 				REAPTIMEOUT_LIST3 +
@@ -1939,14 +1944,14 @@ static void drain_cpu_caches(kmem_cache_
 	smp_call_function_all_cpus(do_drain, cachep);
 	check_irq_on();
 	spin_lock_irq(&cachep->spinlock);
-	for(i = 0; i < MAX_NUMNODES; i++)  {
+	for (i = 0; i < MAX_NUMNODES; i++)  {
 		l3 = cachep->nodelists[i];
 		if (l3) {
 			spin_lock(&l3->list_lock);
 			drain_array_locked(cachep, l3->shared, 1, i);
 			spin_unlock(&l3->list_lock);
 #ifdef CONFIG_NUMA
-			if(l3->alien)
+			if (l3->alien)
 				drain_alien_cache(cachep, l3);
 #endif
 		}
@@ -2074,7 +2079,7 @@ int kmem_cache_destroy(kmem_cache_t * ca
 		kfree(cachep->array[i]);
 
 	/* NUMA: free the list3 structures */
-	for(i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++) {
 		if ((l3 = cachep->nodelists[i])) {
 			kfree(l3->shared);
 #ifdef CONFIG_NUMA
@@ -2092,8 +2097,8 @@ int kmem_cache_destroy(kmem_cache_t * ca
 EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
-static struct slab* alloc_slabmgmt(kmem_cache_t *cachep,
-			void *objp, int colour_off, unsigned int __nocast local_flags)
+static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
+			int colour_off, unsigned int __nocast local_flags)
 {
 	struct slab *slabp;
 	
@@ -2124,7 +2129,7 @@ static void cache_init_objs(kmem_cache_t
 	int i;
 
 	for (i = 0; i < cachep->num; i++) {
-		void* objp = slabp->s_mem+cachep->objsize*i;
+		void *objp = slabp->s_mem+cachep->objsize*i;
 #if DEBUG
 		/* need to poison the objs? */
 		if (cachep->flags & SLAB_POISON)
@@ -2806,8 +2811,7 @@ static inline void __cache_free(kmem_cac
 							alien, nodeid);
 				alien->entry[alien->avail++] = objp;
 				spin_unlock(&alien->lock);
-			}
-			else {
+			} else {
 				spin_lock(&(cachep->nodelists[nodeid])->
 						list_lock);
 				free_block(cachep, &objp, 1);
@@ -3196,7 +3200,7 @@ static int alloc_kmemlist(kmem_cache_t *
 	struct kmem_list3 *l3;
 	int err = 0;
 
-	for(i=0; i < NR_CPUS; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_online(i)) {
 			struct array_cache *nc = NULL, *new;
 #ifdef CONFIG_NUMA
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

next prev parent reply	other threads:[~2005-05-14  7:42 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-05-11 15:17 NUMA aware slab allocator V2 Christoph Lameter
2005-05-11 15:46 ` Jack Steiner
2005-05-12  7:04 ` Andrew Morton
2005-05-12  9:39   ` Niraj kumar
2005-05-12 20:02   ` Christoph Lameter
2005-05-12 20:22     ` Andrew Morton
2005-05-13  7:06     ` Andrew Morton
2005-05-13 11:21       ` Christoph Lameter
2005-05-13 11:33         ` Andrew Morton
2005-05-13 11:37           ` Christoph Lameter
2005-05-13 13:56             ` Dave Hansen
2005-05-13 16:20               ` Christoph Lameter
2005-05-14  1:24           ` NUMA aware slab allocator V3 Christoph Lameter
2005-05-14  7:42             ` Andrew Morton [this message]
2005-05-14 16:24               ` Christoph Lameter
2005-05-16  5:00                 ` Andrew Morton
2005-05-16 13:52             ` Dave Hansen
2005-05-16 16:47               ` Christoph Lameter
2005-05-16 17:22                 ` Dave Hansen
2005-05-16 17:54                   ` Christoph Lameter
2005-05-16 18:08                     ` Martin J. Bligh
2005-05-16 21:10                       ` Jesse Barnes
2005-05-16 21:21                         ` Martin J. Bligh
2005-05-17  0:14                           ` Christoph Lameter
2005-05-17  0:26                             ` Dave Hansen
2005-05-17 23:36                               ` Matthew Dobson
2005-05-17 23:49                                 ` Christoph Lameter
2005-05-18 17:27                                   ` Matthew Dobson
2005-05-18 17:48                                     ` Christoph Lameter
2005-05-18 21:15                                       ` Matthew Dobson
2005-05-18 21:40                                         ` Christoph Lameter
2005-05-19  5:07                                           ` Christoph Lameter
2005-05-19 16:14                                             ` Jesse Barnes
2005-05-19 19:03                                             ` Matthew Dobson
2005-05-19 21:46                                             ` Matthew Dobson
2005-05-20 19:03                                             ` Matthew Dobson
2005-05-20 19:23                                               ` Christoph Lameter
2005-05-20 20:20                                                 ` Matthew Dobson
2005-05-20 21:30                                                 ` Matthew Dobson
2005-05-20 23:42                                                   ` Christoph Lameter
2005-05-24 21:37                                                   ` Christoph Lameter
2005-05-24 23:02                                                     ` Matthew Dobson
2005-05-25  5:21                                                       ` Christoph Lameter
2005-05-25 18:27                                                         ` Matthew Dobson
2005-05-25 21:03                                                           ` Christoph Lameter
2005-05-26  6:48                                                             ` Martin J. Bligh
2005-05-28  1:59                                                       ` NUMA aware slab allocator V4 Christoph Lameter
2005-05-16 21:54                         ` NUMA aware slab allocator V3 Dave Hansen
2005-05-16 18:12                     ` Dave Hansen
2005-05-13 13:46         ` NUMA aware slab allocator V2 Dave Hansen
2005-05-17 23:29       ` Matthew Dobson
2005-05-18  1:07         ` Christoph Lameter
2005-05-12 21:49 ` Robin Holt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050514004204.2302dc52.akpm@osdl.org \
    --to=akpm@osdl.org \
    --cc=clameter@engr.sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=shai@scalex86.org \
    --cc=steiner@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox