Re: [PATCH] numa: mempolicy: dynamic interleave map for system init.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Lameter <clameter@sgi.com>
To: Paul Mundt <lethal@linux-sh.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, ak@suse.de, hugh@veritas.com,
	lee.schermerhorn@hp.com, mpm@selenic.com
Subject: Re: [PATCH] numa: mempolicy: dynamic interleave map for system init.
Date: Thu, 7 Jun 2007 21:27:01 -0700 (PDT)	[thread overview]
Message-ID: <Pine.LNX.4.64.0706072123560.27441@schroedinger.engr.sgi.com> (raw)
In-Reply-To: <20070608041303.GA13603@linux-sh.org>

On Fri, 8 Jun 2007, Paul Mundt wrote:

> Node 1 SUnreclaim:          8 kB

> So at least that gets back the couple of slab pages!

Hmmmm.. is that worth it? The patch is not right btw. There is still the 
case that new_slab can acquire a page on the wrong node and since we are 
not setup to allow that node in SLUB we will crash.

This now gets a bit ugly. In order to avoid that situation we check
first if the node is allowed. If not then we simply ask for an alloc on
the first node.

But that may still make the page allocator fall back. If that happens then
we redo the allocation with GFP_THISNODE to force an allocation on the 
first node or fail.

I think we could do better by constructing a custom zonelist but that will 
be even more special casing.


---
 mm/slub.c |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 5 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2007-06-07 21:01:32.000000000 -0700
+++ linux-2.6/mm/slub.c	2007-06-07 21:23:04.000000000 -0700
@@ -215,6 +215,10 @@ static inline void ClearSlabDebug(struct
 
 static int kmem_size = sizeof(struct kmem_cache);
 
+#ifdef CONFIG_NUMA
+static nodemask_t slub_nodes = NODE_MASK_ALL;
+#endif
+
 #ifdef CONFIG_SMP
 static struct notifier_block slab_notifier;
 #endif
@@ -1023,6 +1027,11 @@ static struct page *new_slab(struct kmem
 	if (flags & __GFP_WAIT)
 		local_irq_enable();
 
+	/* Hack: Just get the first node if the node is not allowed */
+	if (slab_state >= UP && !get_node(s, node))
+		node = first_node(slub_nodes);
+
+redo:
 	page = allocate_slab(s, flags & GFP_LEVEL_MASK, node);
 	if (!page)
 		goto out;
@@ -1030,6 +1039,27 @@ static struct page *new_slab(struct kmem
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
+#ifdef CONFIG_NUMA
+	else {
+		if (slab_state >= UP) {
+			/*
+			 * The baaad page allocator gave us a page on a
+			 * node that we should not use. Force a page on
+			 * a legit node or fail.
+			 */
+			__free_pages(page, s->order);
+			flags |= GFP_THISNODE;
+
+			mod_zone_page_state(page_zone(page),
+				(s->flags & SLAB_RECLAIM_ACCOUNT) ?
+			NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
+				- (1 << s->order));
+
+			goto redo;
+		}
+	}
+#endif
+
 	page->offset = s->offset / sizeof(void *);
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
@@ -1261,10 +1291,13 @@ static struct page *get_any_partial(stru
  */
 static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 {
-	struct page *page;
+	struct page *page = NULL;
 	int searchnode = (node == -1) ? numa_node_id() : node;
+	struct kmem_cache_node *n = get_node(s, searchnode);
+
+	if (n)
+		page = get_partial_node(n);
 
-	page = get_partial_node(get_node(s, searchnode));
 	if (page || (flags & __GFP_THISNODE))
 		return page;
 
@@ -1820,12 +1853,22 @@ static void free_kmem_cache_nodes(struct
 
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = s->node[node];
+
 		if (n && n != &s->local_node)
 			kmem_cache_free(kmalloc_caches, n);
 		s->node[node] = NULL;
 	}
 }
 
+static int __init setup_slub_nodes(char *str)
+{
+	if (*str == '=')
+		nodelist_parse(str + 1, slub_nodes);
+	return 1;
+}
+
+__setup("slub_nodes", setup_slub_nodes);
+
 static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
 {
 	int node;
@@ -1839,6 +1882,9 @@ static int init_kmem_cache_nodes(struct 
 	for_each_online_node(node) {
 		struct kmem_cache_node *n;
 
+		if (!node_isset(node, slub_nodes))
+			continue;
+
 		if (local_node == node)
 			n = &s->local_node;
 		else {
@@ -2094,6 +2140,9 @@ static int kmem_cache_close(struct kmem_
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
+		if (!n)
+			continue;
+
 		n->nr_partial -= free_list(s, n, &n->partial);
 		if (atomic_long_read(&n->nr_slabs))
 			return 1;
@@ -2331,7 +2380,7 @@ int kmem_cache_shrink(struct kmem_cache 
 	for_each_online_node(node) {
 		n = get_node(s, node);
 
-		if (!n->nr_partial)
+		if (!n || !n->nr_partial)
 			continue;
 
 		for (i = 0; i < s->objects; i++)
@@ -2757,7 +2806,8 @@ static unsigned long validate_slab_cache
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
-		count += validate_slab_node(s, n);
+		if (n)
+			count += validate_slab_node(s, n);
 	}
 	return count;
 }
@@ -2981,7 +3031,7 @@ static int list_locations(struct kmem_ca
 		unsigned long flags;
 		struct page *page;
 
-		if (!atomic_read(&n->nr_slabs))
+		if (!n || !atomic_read(&n->nr_slabs))
 			continue;
 
 		spin_lock_irqsave(&n->list_lock, flags);
@@ -3104,6 +3154,9 @@ static unsigned long slab_objects(struct
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
+		if (!n)
+			continue;
+
 		if (flags & SO_PARTIAL) {
 			if (flags & SO_OBJECTS)
 				x = count_partial(n);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2007-06-08  4:27 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-07  1:17 Paul Mundt
2007-06-08  1:01 ` Andrew Morton
2007-06-08  2:47   ` Christoph Lameter
2007-06-08  3:01     ` Andrew Morton
2007-06-08  3:11       ` Christoph Lameter
2007-06-08  3:25     ` Paul Mundt
2007-06-08  3:49       ` Christoph Lameter
2007-06-08  4:13         ` Paul Mundt
2007-06-08  4:27           ` Christoph Lameter [this message]
2007-06-08  6:05             ` Paul Mundt
2007-06-08  6:09               ` Christoph Lameter
2007-06-08  6:27                 ` Paul Mundt
2007-06-08  6:43                   ` Christoph Lameter
2007-06-08 14:50       ` Matt Mackall
2007-06-12  2:36         ` Nick Piggin
2007-06-12  9:43         ` Paul Mundt
2007-06-12 15:32           ` Matt Mackall
2007-06-13  2:10             ` Nick Piggin
2007-06-13  3:12               ` Matt Mackall
2007-06-13  2:53             ` Paul Mundt
2007-06-13  3:16               ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0706072123560.27441@schroedinger.engr.sgi.com \
    --to=clameter@sgi.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=hugh@veritas.com \
    --cc=lee.schermerhorn@hp.com \
    --cc=lethal@linux-sh.org \
    --cc=linux-mm@kvack.org \
    --cc=mpm@selenic.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox