From mboxrd@z Thu Jan 1 00:00:00 1970 Date: Thu, 7 Jun 2007 21:27:01 -0700 (PDT) From: Christoph Lameter Subject: Re: [PATCH] numa: mempolicy: dynamic interleave map for system init. In-Reply-To: <20070608041303.GA13603@linux-sh.org> Message-ID: References: <20070607011701.GA14211@linux-sh.org> <20070607180108.0eeca877.akpm@linux-foundation.org> <20070608032505.GA13227@linux-sh.org> <20070608041303.GA13603@linux-sh.org> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: owner-linux-mm@kvack.org Return-Path: To: Paul Mundt Cc: Andrew Morton , linux-mm@kvack.org, ak@suse.de, hugh@veritas.com, lee.schermerhorn@hp.com, mpm@selenic.com List-ID: On Fri, 8 Jun 2007, Paul Mundt wrote: > Node 1 SUnreclaim: 8 kB > So at least that gets back the couple of slab pages! Hmmmm.. is that worth it? The patch is not right btw. There is still the case that new_slab can acquire a page on the wrong node and since we are not setup to allow that node in SLUB we will crash. This now gets a bit ugly. In order to avoid that situation we check first if the node is allowed. If not then we simply ask for an alloc on the first node. But that may still make the page allocator fall back. If that happens then we redo the allocation with GFP_THISNODE to force an allocation on the first node or fail. I think we could do better by constructing a custom zonelist but that will be even more special casing. --- mm/slub.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 5 deletions(-) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2007-06-07 21:01:32.000000000 -0700 +++ linux-2.6/mm/slub.c 2007-06-07 21:23:04.000000000 -0700 @@ -215,6 +215,10 @@ static inline void ClearSlabDebug(struct static int kmem_size = sizeof(struct kmem_cache); +#ifdef CONFIG_NUMA +static nodemask_t slub_nodes = NODE_MASK_ALL; +#endif + #ifdef CONFIG_SMP static struct notifier_block slab_notifier; #endif @@ -1023,6 +1027,11 @@ static struct page *new_slab(struct kmem if (flags & __GFP_WAIT) local_irq_enable(); + /* Hack: Just get the first node if the node is not allowed */ + if (slab_state >= UP && !get_node(s, node)) + node = first_node(slub_nodes); + +redo: page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); if (!page) goto out; @@ -1030,6 +1039,27 @@ static struct page *new_slab(struct kmem n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); +#ifdef CONFIG_NUMA + else { + if (slab_state >= UP) { + /* + * The baaad page allocator gave us a page on a + * node that we should not use. Force a page on + * a legit node or fail. + */ + __free_pages(page, s->order); + flags |= GFP_THISNODE; + + mod_zone_page_state(page_zone(page), + (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, + - (1 << s->order)); + + goto redo; + } + } +#endif + page->offset = s->offset / sizeof(void *); page->slab = s; page->flags |= 1 << PG_slab; @@ -1261,10 +1291,13 @@ static struct page *get_any_partial(stru */ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) { - struct page *page; + struct page *page = NULL; int searchnode = (node == -1) ? numa_node_id() : node; + struct kmem_cache_node *n = get_node(s, searchnode); + + if (n) + page = get_partial_node(n); - page = get_partial_node(get_node(s, searchnode)); if (page || (flags & __GFP_THISNODE)) return page; @@ -1820,12 +1853,22 @@ static void free_kmem_cache_nodes(struct for_each_online_node(node) { struct kmem_cache_node *n = s->node[node]; + if (n && n != &s->local_node) kmem_cache_free(kmalloc_caches, n); s->node[node] = NULL; } } +static int __init setup_slub_nodes(char *str) +{ + if (*str == '=') + nodelist_parse(str + 1, slub_nodes); + return 1; +} + +__setup("slub_nodes", setup_slub_nodes); + static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) { int node; @@ -1839,6 +1882,9 @@ static int init_kmem_cache_nodes(struct for_each_online_node(node) { struct kmem_cache_node *n; + if (!node_isset(node, slub_nodes)) + continue; + if (local_node == node) n = &s->local_node; else { @@ -2094,6 +2140,9 @@ static int kmem_cache_close(struct kmem_ for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); + if (!n) + continue; + n->nr_partial -= free_list(s, n, &n->partial); if (atomic_long_read(&n->nr_slabs)) return 1; @@ -2331,7 +2380,7 @@ int kmem_cache_shrink(struct kmem_cache for_each_online_node(node) { n = get_node(s, node); - if (!n->nr_partial) + if (!n || !n->nr_partial) continue; for (i = 0; i < s->objects; i++) @@ -2757,7 +2806,8 @@ static unsigned long validate_slab_cache for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); - count += validate_slab_node(s, n); + if (n) + count += validate_slab_node(s, n); } return count; } @@ -2981,7 +3031,7 @@ static int list_locations(struct kmem_ca unsigned long flags; struct page *page; - if (!atomic_read(&n->nr_slabs)) + if (!n || !atomic_read(&n->nr_slabs)) continue; spin_lock_irqsave(&n->list_lock, flags); @@ -3104,6 +3154,9 @@ static unsigned long slab_objects(struct for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); + if (!n) + continue; + if (flags & SO_PARTIAL) { if (flags & SO_OBJECTS) x = count_partial(n); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org