Re: [PATCH] numa: mempolicy: dynamic interleave map for system init.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Lameter <clameter@sgi.com>
To: Paul Mundt <lethal@linux-sh.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, ak@suse.de, hugh@veritas.com,
	lee.schermerhorn@hp.com, mpm@selenic.com
Subject: Re: [PATCH] numa: mempolicy: dynamic interleave map for system init.
Date: Thu, 7 Jun 2007 20:49:53 -0700 (PDT)	[thread overview]
Message-ID: <Pine.LNX.4.64.0706072027300.27295@schroedinger.engr.sgi.com> (raw)
In-Reply-To: <20070608032505.GA13227@linux-sh.org>

On Fri, 8 Jun 2007, Paul Mundt wrote:

> obviously possible to try to work that in to SLOB or something similar,
> if making SLUB or SLAB lighterweight and more tunable for these cases
> ends up being a real barrier.

Its obviously possible and as far as I can tell the architecture you have 
there requires it to operate. But the question is how much special casing 
we will have to add to the core VM.

We would likely have to add a 

slub_nodes=

parameter that allows the specification of a nodelist that is allowed for 
the slab allocator. Then modify slub to use its own nodemap instead of 
the node online map. Modify get_partial_node to not try a node not in the 
nodemap and go to get_any_partial immediately. In addition to checking 
cpuset_zone_allowed we would need to check the slab node list.

Hmm.... That would also help to create isolated nodes that have no memory 
on them.

See what evil things you drive me to...

Could you try this patch (untested)? Set the allowed nodes on boot
with

slub_nodes=0

if you have only node 0 for SLUB.

---
 mm/slub.c |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 6 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2007-06-07 20:32:30.000000000 -0700
+++ linux-2.6/mm/slub.c	2007-06-07 20:48:19.000000000 -0700
@@ -270,6 +270,20 @@ static inline struct kmem_cache_node *ge
 #endif
 }
 
+#ifdef CONFIG_NUMA
+static nodemask_t slub_nodes = NODE_MASK_ALL;
+
+static inline int forbidden_node(int node)
+{
+	return !node_isset(node, slub_nodes);
+}
+#else
+static inline int forbidden_node(int node)
+{
+	return 0;
+}
+#endif
+
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
@@ -1242,8 +1256,12 @@ static struct page *get_any_partial(stru
 					->node_zonelists[gfp_zone(flags)];
 	for (z = zonelist->zones; *z; z++) {
 		struct kmem_cache_node *n;
+		int node = zone_to_nid(*z);
 
-		n = get_node(s, zone_to_nid(*z));
+		if (forbidden_node(node))
+			continue;
+
+		n = get_node(s, node);
 
 		if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
 				n->nr_partial > MIN_PARTIAL) {
@@ -1261,10 +1279,12 @@ static struct page *get_any_partial(stru
  */
 static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 {
-	struct page *page;
+	struct page *page = NULL;
 	int searchnode = (node == -1) ? numa_node_id() : node;
 
-	page = get_partial_node(get_node(s, searchnode));
+	if (!forbidden_node(node))
+		page = get_partial_node(get_node(s, searchnode));
+
 	if (page || (flags & __GFP_THISNODE))
 		return page;
 
@@ -1819,7 +1839,11 @@ static void free_kmem_cache_nodes(struct
 	int node;
 
 	for_each_online_node(node) {
-		struct kmem_cache_node *n = s->node[node];
+		struct kmem_cache_node *n;
+
+		if (forbidden_node(node))
+			continue;
+		n= s->node[node];
 		if (n && n != &s->local_node)
 			kmem_cache_free(kmalloc_caches, n);
 		s->node[node] = NULL;
@@ -1839,6 +1863,9 @@ static int init_kmem_cache_nodes(struct 
 	for_each_online_node(node) {
 		struct kmem_cache_node *n;
 
+		if (forbidden_node(node))
+			continue;
+
 		if (local_node == node)
 			n = &s->local_node;
 		else {
@@ -2092,7 +2119,12 @@ static int kmem_cache_close(struct kmem_
 
 	/* Attempt to free all objects */
 	for_each_online_node(node) {
-		struct kmem_cache_node *n = get_node(s, node);
+		struct kmem_cache_node *n;
+
+		if (forbidden_node(node))
+			continue;
+
+		n = get_node(s, node);
 
 		n->nr_partial -= free_list(s, n, &n->partial);
 		if (atomic_long_read(&n->nr_slabs))
@@ -2167,6 +2199,17 @@ static int __init setup_slub_nomerge(cha
 
 __setup("slub_nomerge", setup_slub_nomerge);
 
+#ifdef CONFIG_NUMA
+static int __init setup_slub_nodes(char *str)
+{
+	if (*str == '=')
+		nodelist_parse(str + 1, slub_nodes);
+	return 1;
+}
+
+__setup("slub_nodes", setup_slub_nodes);
+#endif
+
 static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
 		const char *name, int size, gfp_t gfp_flags)
 {
@@ -2329,6 +2372,9 @@ int kmem_cache_shrink(struct kmem_cache 
 
 	flush_all(s);
 	for_each_online_node(node) {
+		if (forbidden_node(node))
+			continue;
+
 		n = get_node(s, node);
 
 		if (!n->nr_partial)
@@ -2755,7 +2801,12 @@ static unsigned long validate_slab_cache
 
 	flush_all(s);
 	for_each_online_node(node) {
-		struct kmem_cache_node *n = get_node(s, node);
+		struct kmem_cache_node *n;
+
+		if (forbidden_node(node))
+			continue;
+
+		n = get_node(s, node);
 
 		count += validate_slab_node(s, n);
 	}
@@ -2981,6 +3032,9 @@ static int list_locations(struct kmem_ca
 		unsigned long flags;
 		struct page *page;
 
+		if (forbidden_node(node))
+			continue;
+
 		if (!atomic_read(&n->nr_slabs))
 			continue;
 
@@ -3104,6 +3158,9 @@ static unsigned long slab_objects(struct
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
+		if (forbidden_node(node))
+			continue;
+
 		if (flags & SO_PARTIAL) {
 			if (flags & SO_OBJECTS)
 				x = count_partial(n);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2007-06-08  3:49 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-07  1:17 Paul Mundt
2007-06-08  1:01 ` Andrew Morton
2007-06-08  2:47   ` Christoph Lameter
2007-06-08  3:01     ` Andrew Morton
2007-06-08  3:11       ` Christoph Lameter
2007-06-08  3:25     ` Paul Mundt
2007-06-08  3:49       ` Christoph Lameter [this message]
2007-06-08  4:13         ` Paul Mundt
2007-06-08  4:27           ` Christoph Lameter
2007-06-08  6:05             ` Paul Mundt
2007-06-08  6:09               ` Christoph Lameter
2007-06-08  6:27                 ` Paul Mundt
2007-06-08  6:43                   ` Christoph Lameter
2007-06-08 14:50       ` Matt Mackall
2007-06-12  2:36         ` Nick Piggin
2007-06-12  9:43         ` Paul Mundt
2007-06-12 15:32           ` Matt Mackall
2007-06-13  2:10             ` Nick Piggin
2007-06-13  3:12               ` Matt Mackall
2007-06-13  2:53             ` Paul Mundt
2007-06-13  3:16               ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0706072027300.27295@schroedinger.engr.sgi.com \
    --to=clameter@sgi.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=hugh@veritas.com \
    --cc=lee.schermerhorn@hp.com \
    --cc=lethal@linux-sh.org \
    --cc=linux-mm@kvack.org \
    --cc=mpm@selenic.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox