Re: NUMA aware slab allocator V3

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Lameter <christoph@lameter.com>
To: "Martin J. Bligh" <mbligh@mbligh.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>,
	Christoph Lameter <clameter@engr.sgi.com>,
	Dave Hansen <haveblue@us.ibm.com>,
	Andy Whitcroft <apw@shadowen.org>, Andrew Morton <akpm@osdl.org>,
	linux-mm <linux-mm@kvack.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	shai@scalex86.org, steiner@sgi.com
Subject: Re: NUMA aware slab allocator V3
Date: Mon, 16 May 2005 17:14:53 -0700 (PDT)	[thread overview]
Message-ID: <Pine.LNX.4.62.0505161713130.21512@graphe.net> (raw)
In-Reply-To: <740100000.1116278461@flay>

On Mon, 16 May 2005, Martin J. Bligh wrote:

> > Yeah, makes sense for the NUMA aware slab allocator to depend on 
> > CONFIG_NUMA.
> 
> Andy confirmed offline that this is really CONFIG_NEED_MULTIPLE_PGDATS,
> and is just named wrong.

Hmmm.. In this case it may be necessary for the slab allocator to 
determine what is the proper number of NUMA nodes. I do not really like it 
but it seems that we need the following patch to rectify the situation.

Index: linux-2.6.12-rc4/mm/slab.c
===================================================================
--- linux-2.6.12-rc4.orig/mm/slab.c	2005-05-16 16:58:44.000000000 -0700
+++ linux-2.6.12-rc4/mm/slab.c	2005-05-16 17:04:11.000000000 -0700
@@ -112,10 +112,12 @@
  * there is only a single node if CONFIG_NUMA is not set. Remove this check
  * after the situation has stabilized.
  */
-#ifndef CONFIG_NUMA
-#if MAX_NUMNODES != 1
-#error "Broken Configuration: CONFIG_NUMA not set but MAX_NUMNODES !=1 !!"
-#endif
+#ifdef CONFIG_NUMA
+#define NUMA_NODES MAX_NUMNODES
+#define NUMA_NODE_ID numa_node_id()
+#else
+#define NUMA_NODES 1
+#define NUMA_NODE_ID 0
 #endif
 
 /*
@@ -311,7 +313,7 @@
 /*
  * Need this for bootstrapping a per node allocator.
  */
-#define NUM_INIT_LISTS (2 + MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 + NUMA_NODES)
 struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
 #define	CACHE_CACHE 0
 #define	SIZE_AC 1
@@ -385,7 +387,7 @@
 	} while (0)
 
 #define list3_data(cachep) \
-	((cachep->nodelists[numa_node_id()]))
+	((cachep->nodelists[NUMA_NODE_ID]))
 
 /* NUMA: per-node */
 #define list3_data_ptr(cachep, ptr) \
@@ -405,7 +407,7 @@
 	unsigned int 		shared;
 	unsigned int		objsize;
 /* 2) touched by every alloc & free from the backend */
-	struct kmem_list3	*nodelists[MAX_NUMNODES];
+	struct kmem_list3	*nodelists[NUMA_NODES];
 	unsigned int	 	flags;	/* constant flags */
 	unsigned int		num;	/* # of objs per slab */
 	spinlock_t		spinlock;
@@ -792,7 +794,7 @@
 static inline struct array_cache **alloc_alien_cache(int cpu, int limit)
 {
 	struct array_cache **ac_ptr;
-	int memsize = sizeof(void*)*MAX_NUMNODES;
+	int memsize = sizeof(void*)*NUMA_NODES;
 	int node = cpu_to_node(cpu);
 	int i;
 
@@ -800,7 +802,7 @@
 		limit = 12;
 	ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
 	if (ac_ptr) {
-		for (i = 0; i < MAX_NUMNODES; i++) {
+		for (i = 0; i < NUMA_NODES; i++) {
 			if (i == node) {
 				ac_ptr[i] = NULL;
 				continue;
@@ -823,7 +825,7 @@
 
 	if (!ac_ptr)
 		return;
-	for (i = 0; i < MAX_NUMNODES; i++)
+	for (i = 0; i < NUMA_NODES; i++)
 		kfree(ac_ptr[i]);
 
 	kfree(ac_ptr);
@@ -847,7 +849,7 @@
 	struct array_cache *ac;
 	unsigned long flags;
 
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < NUMA_NODES; i++) {
 		ac = l3->alien[i];
 		if (ac) {
 			spin_lock_irqsave(&ac->lock, flags);
@@ -1028,7 +1030,7 @@
 
 	for (i = 0; i < NUM_INIT_LISTS; i++) {
 		LIST3_INIT(&initkmem_list3[i]);
-		if (i < MAX_NUMNODES)
+		if (i < NUMA_NODES)
 			cache_cache.nodelists[i] = NULL;
 	}
 
@@ -1065,7 +1067,7 @@
 	list_add(&cache_cache.next, &cache_chain);
 	cache_cache.colour_off = cache_line_size();
 	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
-	cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
+	cache_cache.nodelists[NUMA_NODE_ID] = &initkmem_list3[CACHE_CACHE];
 
 	cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());
 
@@ -1154,7 +1156,7 @@
 		int node;
 		/* Replace the static kmem_list3 structures for the boot cpu */
 		init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
-				numa_node_id());
+				NUMA_NODE_ID);
 
 		for_each_online_node(node) {
 				init_list(malloc_sizes[INDEX_L3].cs_cachep,
@@ -1163,7 +1165,7 @@
 		if (INDEX_AC != INDEX_L3) {
 			init_list(malloc_sizes[INDEX_AC].cs_cachep,
 					&initkmem_list3[SIZE_AC],
-					numa_node_id());
+					NUMA_NODE_ID);
 		}
 	}
 
@@ -1778,7 +1780,7 @@
 				set_up_list3s(cachep);
 				g_cpucache_up = PARTIAL_L3;
 			} else {
-				cachep->nodelists[numa_node_id()] =
+				cachep->nodelists[NUMA_NODE_ID] =
 					&initkmem_list3[SIZE_AC];
 				g_cpucache_up = PARTIAL_AC;
 			}
@@ -1791,18 +1793,18 @@
 				set_up_list3s(cachep);
 				g_cpucache_up = PARTIAL_L3;
 			} else {
-				cachep->nodelists[numa_node_id()] =
+				cachep->nodelists[NUMA_NODE_ID] =
 					kmalloc(sizeof(struct kmem_list3),
 						GFP_KERNEL);
-				LIST3_INIT(cachep->nodelists[numa_node_id()]);
+				LIST3_INIT(cachep->nodelists[NUMA_NODE_ID]);
 			}
 		}
-		cachep->nodelists[numa_node_id()]->next_reap =
+		cachep->nodelists[NUMA_NODE_ID]->next_reap =
 			jiffies + REAPTIMEOUT_LIST3 +
 			((unsigned long)cachep)%REAPTIMEOUT_LIST3;
 
 		BUG_ON(!ac_data(cachep));
-		BUG_ON(!cachep->nodelists[numa_node_id()]);
+		BUG_ON(!cachep->nodelists[NUMA_NODE_ID]);
 		ac_data(cachep)->avail = 0;
 		ac_data(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 		ac_data(cachep)->batchcount = 1;
@@ -1986,7 +1988,7 @@
 	drain_cpu_caches(cachep);
 
 	check_irq_on();
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < NUMA_NODES; i++) {
 		l3 = cachep->nodelists[i];
 		if (l3) {
 			spin_lock_irq(&l3->list_lock);
@@ -2068,7 +2070,7 @@
 		kfree(cachep->array[i]);
 
 	/* NUMA: free the list3 structures */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < NUMA_NODES; i++) {
 		if ((l3 = cachep->nodelists[i])) {
 			kfree(l3->shared);
 #ifdef CONFIG_NUMA
@@ -2482,7 +2484,7 @@
 
 	if (unlikely(!ac->avail)) {
 		int x;
-		x = cache_grow(cachep, flags, numa_node_id());
+		x = cache_grow(cachep, flags, NUMA_NODE_ID);
 
 		// cache_grow can reenable interrupts, then ac could change.
 		ac = ac_data(cachep);
@@ -2786,7 +2788,7 @@
 	{
 		struct slab *slabp;
 		slabp = GET_PAGE_SLAB(virt_to_page(objp));
-		if (unlikely(slabp->nodeid != numa_node_id())) {
+		if (unlikely(slabp->nodeid != NUMA_NODE_ID)) {
 			struct array_cache *alien = NULL;
 			int nodeid = slabp->nodeid;
 			struct kmem_list3 *l3 = list3_data(cachep);
@@ -2896,7 +2898,7 @@
 	unsigned long save_flags;
 	void *ptr;
 
-	if (nodeid == numa_node_id() || nodeid == -1)
+	if (nodeid == NUMA_NODE_ID || nodeid == -1)
 		return __cache_alloc(cachep, flags);
 
 	cache_alloc_debugcheck_before(cachep, flags);
@@ -3437,7 +3439,7 @@
 		spin_lock_irq(&l3->list_lock);
 
 		drain_array_locked(searchp, ac_data(searchp), 0,
-				numa_node_id());
+				NUMA_NODE_ID);
 
 #if DEBUG
 		if (time_before(searchp->redzonetest, jiffies)) {
@@ -3452,7 +3454,7 @@
 
 		if (l3->shared)
 			drain_array_locked(searchp, l3->shared, 0,
-				numa_node_id());
+				NUMA_NODE_ID);
 
 		if (l3->free_touched) {
 			l3->free_touched = 0;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

next prev parent reply	other threads:[~2005-05-17  0:14 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-05-11 15:17 NUMA aware slab allocator V2 Christoph Lameter
2005-05-11 15:46 ` Jack Steiner
2005-05-12  7:04 ` Andrew Morton
2005-05-12  9:39   ` Niraj kumar
2005-05-12 20:02   ` Christoph Lameter
2005-05-12 20:22     ` Andrew Morton
2005-05-13  7:06     ` Andrew Morton
2005-05-13 11:21       ` Christoph Lameter
2005-05-13 11:33         ` Andrew Morton
2005-05-13 11:37           ` Christoph Lameter
2005-05-13 13:56             ` Dave Hansen
2005-05-13 16:20               ` Christoph Lameter
2005-05-14  1:24           ` NUMA aware slab allocator V3 Christoph Lameter
2005-05-14  7:42             ` Andrew Morton
2005-05-14 16:24               ` Christoph Lameter
2005-05-16  5:00                 ` Andrew Morton
2005-05-16 13:52             ` Dave Hansen
2005-05-16 16:47               ` Christoph Lameter
2005-05-16 17:22                 ` Dave Hansen
2005-05-16 17:54                   ` Christoph Lameter
2005-05-16 18:08                     ` Martin J. Bligh
2005-05-16 21:10                       ` Jesse Barnes
2005-05-16 21:21                         ` Martin J. Bligh
2005-05-17  0:14                           ` Christoph Lameter [this message]
2005-05-17  0:26                             ` Dave Hansen
2005-05-17 23:36                               ` Matthew Dobson
2005-05-17 23:49                                 ` Christoph Lameter
2005-05-18 17:27                                   ` Matthew Dobson
2005-05-18 17:48                                     ` Christoph Lameter
2005-05-18 21:15                                       ` Matthew Dobson
2005-05-18 21:40                                         ` Christoph Lameter
2005-05-19  5:07                                           ` Christoph Lameter
2005-05-19 16:14                                             ` Jesse Barnes
2005-05-19 19:03                                             ` Matthew Dobson
2005-05-19 21:46                                             ` Matthew Dobson
2005-05-20 19:03                                             ` Matthew Dobson
2005-05-20 19:23                                               ` Christoph Lameter
2005-05-20 20:20                                                 ` Matthew Dobson
2005-05-20 21:30                                                 ` Matthew Dobson
2005-05-20 23:42                                                   ` Christoph Lameter
2005-05-24 21:37                                                   ` Christoph Lameter
2005-05-24 23:02                                                     ` Matthew Dobson
2005-05-25  5:21                                                       ` Christoph Lameter
2005-05-25 18:27                                                         ` Matthew Dobson
2005-05-25 21:03                                                           ` Christoph Lameter
2005-05-26  6:48                                                             ` Martin J. Bligh
2005-05-28  1:59                                                       ` NUMA aware slab allocator V4 Christoph Lameter
2005-05-16 21:54                         ` NUMA aware slab allocator V3 Dave Hansen
2005-05-16 18:12                     ` Dave Hansen
2005-05-13 13:46         ` NUMA aware slab allocator V2 Dave Hansen
2005-05-17 23:29       ` Matthew Dobson
2005-05-18  1:07         ` Christoph Lameter
2005-05-12 21:49 ` Robin Holt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.62.0505161713130.21512@graphe.net \
    --to=christoph@lameter.com \
    --cc=akpm@osdl.org \
    --cc=apw@shadowen.org \
    --cc=clameter@engr.sgi.com \
    --cc=haveblue@us.ibm.com \
    --cc=jbarnes@virtuousgeek.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mbligh@mbligh.org \
    --cc=shai@scalex86.org \
    --cc=steiner@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox