Re: slub: remove dynamic dma slab allocation

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: David Rientjes <rientjes@google.com>
To: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>, linux-mm@kvack.org
Subject: Re: slub: remove dynamic dma slab allocation
Date: Fri, 18 Jun 2010 15:30:13 -0700 (PDT)	[thread overview]
Message-ID: <alpine.DEB.2.00.1006181513060.20110@chino.kir.corp.google.com> (raw)
In-Reply-To: <alpine.DEB.2.00.1006151406120.10865@router.home>

On Tue, 15 Jun 2010, Christoph Lameter wrote:

> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c	2010-06-15 12:40:58.000000000 -0500
> +++ linux-2.6/mm/slub.c	2010-06-15 12:41:36.000000000 -0500
> @@ -2070,7 +2070,7 @@ init_kmem_cache_node(struct kmem_cache_n
> 
>  static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]);
> 
> -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
> +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
>  {
>  	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)

Looks like it'll conflict with "SLUB: is_kmalloc_cache" in slub/cleanups.

>  		/*
> @@ -2097,7 +2097,7 @@ static inline int alloc_kmem_cache_cpus(
>   * when allocating for the kmalloc_node_cache. This is used for bootstrapping
>   * memory on a fresh node that has no slab structures yet.
>   */
> -static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
> +static void early_kmem_cache_node_alloc(int node)
>  {
>  	struct page *page;
>  	struct kmem_cache_node *n;
> @@ -2105,7 +2105,7 @@ static void early_kmem_cache_node_alloc(
> 
>  	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
> 
> -	page = new_slab(kmalloc_caches, gfpflags, node);
> +	page = new_slab(kmalloc_caches, GFP_KERNEL, node);
> 
>  	BUG_ON(!page);
>  	if (page_to_nid(page) != node) {

Hmm, not sure of this.  We can't do GFP_KERNEL allocations in 
kmem_cache_init(), they must be deferred to kmem_cache_init_late().  So 
this will be allocating the kmem_cache_node cache while slab_state is 
still DOWN and yet passing GFP_KERNEL via early_kmem_cache_node_alloc().

I think this has to be GFP_NOWAIT instead.

> @@ -2149,7 +2149,7 @@ static void free_kmem_cache_nodes(struct
>  	}
>  }
> 
> -static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
> +static int init_kmem_cache_nodes(struct kmem_cache *s)
>  {
>  	int node;
> 
> @@ -2157,11 +2157,11 @@ static int init_kmem_cache_nodes(struct
>  		struct kmem_cache_node *n;
> 
>  		if (slab_state == DOWN) {
> -			early_kmem_cache_node_alloc(gfpflags, node);
> +			early_kmem_cache_node_alloc(node);
>  			continue;
>  		}
>  		n = kmem_cache_alloc_node(kmalloc_caches,
> -						gfpflags, node);
> +						GFP_KERNEL, node);
> 
>  		if (!n) {
>  			free_kmem_cache_nodes(s);

Same here, this can still lead to GFP_KERNEL allocations from 
kmem_cache_init() because slab_state is PARTIAL or UP.

> @@ -2178,7 +2178,7 @@ static void free_kmem_cache_nodes(struct
>  {
>  }
> 
> -static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
> +static int init_kmem_cache_nodes(struct kmem_cache *s)
>  {
>  	init_kmem_cache_node(&s->local_node, s);
>  	return 1;
> @@ -2318,7 +2318,7 @@ static int calculate_sizes(struct kmem_c
> 
>  }
> 
> -static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
> +static int kmem_cache_open(struct kmem_cache *s,
>  		const char *name, size_t size,
>  		size_t align, unsigned long flags,
>  		void (*ctor)(void *))
> @@ -2354,10 +2354,10 @@ static int kmem_cache_open(struct kmem_c
>  #ifdef CONFIG_NUMA
>  	s->remote_node_defrag_ratio = 1000;
>  #endif
> -	if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
> +	if (!init_kmem_cache_nodes(s))
>  		goto error;
> 
> -	if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
> +	if (alloc_kmem_cache_cpus(s))
>  		return 1;
> 
>  	free_kmem_cache_nodes(s);
> @@ -2517,6 +2517,10 @@ EXPORT_SYMBOL(kmem_cache_destroy);
>  struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
>  EXPORT_SYMBOL(kmalloc_caches);
> 
> +#ifdef CONFIG_ZONE_DMA
> +static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT];
> +#endif
> +
>  static int __init setup_slub_min_order(char *str)
>  {
>  	get_option(&str, &slub_min_order);
> @@ -2553,116 +2557,26 @@ static int __init setup_slub_nomerge(cha
> 
>  __setup("slub_nomerge", setup_slub_nomerge);
> 
> -static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
> -		const char *name, int size, gfp_t gfp_flags)
> +static void create_kmalloc_cache(struct kmem_cache *s,
> +		const char *name, int size, unsigned int flags)
>  {
> -	unsigned int flags = 0;
> -
> -	if (gfp_flags & SLUB_DMA)
> -		flags = SLAB_CACHE_DMA;
> -
>  	/*
>  	 * This function is called with IRQs disabled during early-boot on
>  	 * single CPU so there's no need to take slub_lock here.
>  	 */
> -	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
> +	if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
>  								flags, NULL))
>  		goto panic;
> 
>  	list_add(&s->list, &slab_caches);
> 
> -	if (sysfs_slab_add(s))
> -		goto panic;
> -	return s;
> +	if (!sysfs_slab_add(s))
> +		return;
> 
>  panic:
>  	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
>  }
> 
> -#ifdef CONFIG_ZONE_DMA
> -static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
> -
> -static void sysfs_add_func(struct work_struct *w)
> -{
> -	struct kmem_cache *s;
> -
> -	down_write(&slub_lock);
> -	list_for_each_entry(s, &slab_caches, list) {
> -		if (s->flags & __SYSFS_ADD_DEFERRED) {
> -			s->flags &= ~__SYSFS_ADD_DEFERRED;
> -			sysfs_slab_add(s);
> -		}
> -	}
> -	up_write(&slub_lock);
> -}
> -
> -static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
> -
> -static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
> -{
> -	struct kmem_cache *s;
> -	char *text;
> -	size_t realsize;
> -	unsigned long slabflags;
> -	int i;
> -
> -	s = kmalloc_caches_dma[index];
> -	if (s)
> -		return s;
> -
> -	/* Dynamically create dma cache */
> -	if (flags & __GFP_WAIT)
> -		down_write(&slub_lock);
> -	else {
> -		if (!down_write_trylock(&slub_lock))
> -			goto out;
> -	}
> -
> -	if (kmalloc_caches_dma[index])
> -		goto unlock_out;
> -
> -	realsize = kmalloc_caches[index].objsize;
> -	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
> -			 (unsigned int)realsize);
> -
> -	s = NULL;
> -	for (i = 0; i < KMALLOC_CACHES; i++)
> -		if (!kmalloc_caches[i].size)
> -			break;
> -
> -	BUG_ON(i >= KMALLOC_CACHES);
> -	s = kmalloc_caches + i;
> -
> -	/*
> -	 * Must defer sysfs creation to a workqueue because we don't know
> -	 * what context we are called from. Before sysfs comes up, we don't
> -	 * need to do anything because our sysfs initcall will start by
> -	 * adding all existing slabs to sysfs.
> -	 */
> -	slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
> -	if (slab_state >= SYSFS)
> -		slabflags |= __SYSFS_ADD_DEFERRED;
> -
> -	if (!text || !kmem_cache_open(s, flags, text,
> -			realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
> -		s->size = 0;
> -		kfree(text);
> -		goto unlock_out;
> -	}
> -
> -	list_add(&s->list, &slab_caches);
> -	kmalloc_caches_dma[index] = s;
> -
> -	if (slab_state >= SYSFS)
> -		schedule_work(&sysfs_add_work);
> -
> -unlock_out:
> -	up_write(&slub_lock);
> -out:
> -	return kmalloc_caches_dma[index];
> -}
> -#endif
> -
>  /*
>   * Conversion table for small slabs sizes / 8 to the index in the
>   * kmalloc array. This is necessary for slabs < 192 since we have non power
> @@ -2715,7 +2629,7 @@ static struct kmem_cache *get_slab(size_
> 
>  #ifdef CONFIG_ZONE_DMA
>  	if (unlikely((flags & SLUB_DMA)))
> -		return dma_kmalloc_cache(index, flags);
> +		return &kmalloc_dma_caches[index];
> 
>  #endif
>  	return &kmalloc_caches[index];
> @@ -3053,7 +2967,7 @@ void __init kmem_cache_init(void)
>  	 * kmem_cache_open for slab_state == DOWN.
>  	 */
>  	create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
> -		sizeof(struct kmem_cache_node), GFP_NOWAIT);
> +		sizeof(struct kmem_cache_node), 0);
>  	kmalloc_caches[0].refcount = -1;
>  	caches++;
> 
> @@ -3066,18 +2980,18 @@ void __init kmem_cache_init(void)
>  	/* Caches that are not of the two-to-the-power-of size */
>  	if (KMALLOC_MIN_SIZE <= 32) {
>  		create_kmalloc_cache(&kmalloc_caches[1],
> -				"kmalloc-96", 96, GFP_NOWAIT);
> +				"kmalloc-96", 96, 0);
>  		caches++;
>  	}
>  	if (KMALLOC_MIN_SIZE <= 64) {
>  		create_kmalloc_cache(&kmalloc_caches[2],
> -				"kmalloc-192", 192, GFP_NOWAIT);
> +				"kmalloc-192", 192, 0);
>  		caches++;
>  	}
> 
>  	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
>  		create_kmalloc_cache(&kmalloc_caches[i],
> -			"kmalloc", 1 << i, GFP_NOWAIT);
> +			"kmalloc", 1 << i, 0);
>  		caches++;
>  	}
> 
> @@ -3124,7 +3038,7 @@ void __init kmem_cache_init(void)
> 
>  	/* Provide the correct kmalloc names now that the caches are up */
>  	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
> -		kmalloc_caches[i]. name =
> +		kmalloc_caches[i].name =
>  			kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
> 
>  #ifdef CONFIG_SMP
> @@ -3147,6 +3061,21 @@ void __init kmem_cache_init(void)
> 
>  void __init kmem_cache_init_late(void)
>  {
> +#ifdef CONFIG_ZONE_DMA
> +	int i;
> +
> +	for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
> +		struct kmem_cache *s = &kmalloc_caches[i];
> +
> +		if (s && s->size) {
> +			char *name = kasprintf(GFP_KERNEL,
> +				 "dma-kmalloc-%d", s->objsize);

kasprintf() can return NULL which isn't caught by kmem_cache_open().

> +
> +			create_kmalloc_cache(&kmalloc_dma_caches[i],
> +				name, s->objsize, SLAB_CACHE_DMA);
> +		}
> +	}
> +#endif
>  }
> 
>  /*
> @@ -3241,7 +3170,7 @@ struct kmem_cache *kmem_cache_create(con
> 
>  	s = kmalloc(kmem_size, GFP_KERNEL);
>  	if (s) {
> -		if (kmem_cache_open(s, GFP_KERNEL, name,
> +		if (kmem_cache_open(s, name,
>  				size, align, flags, ctor)) {
>  			list_add(&s->list, &slab_caches);
>  			up_write(&slub_lock);
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2010-06-18 22:30 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-15 19:07 Christoph Lameter
2010-06-15 19:11 ` [RFC] slub: Simplify boot kmem_cache_cpu allocations Christoph Lameter
2010-06-16  8:53   ` Tejun Heo
2010-06-16 16:33     ` Christoph Lameter
2010-06-16 17:18       ` Tejun Heo
2010-06-16 17:35         ` Christoph Lameter
2010-06-17  8:49           ` Tejun Heo
2010-06-17  9:01             ` Pekka Enberg
2010-06-17 13:43             ` Christoph Lameter
2010-06-18 16:58               ` [PATCH 1/2] percpu: make @dyn_size always mean min dyn_size in first chunk init functions Tejun Heo
2010-06-18 17:29                 ` Christoph Lameter
2010-06-18 17:31                 ` Christoph Lameter
2010-06-18 17:39                   ` Tejun Heo
2010-06-18 18:03                     ` Christoph Lameter
2010-06-19  8:23                       ` Tejun Heo
2010-06-18 16:58               ` [PATCH 2/2] percpu: allow limited allocation before slab is online Tejun Heo
2010-06-18 22:30 ` David Rientjes [this message]
2010-06-21 14:25   ` slub: remove dynamic dma slab allocation Christoph Lameter
2010-06-21 19:56     ` David Rientjes
2010-06-21 20:32       ` Christoph Lameter
2010-06-21 21:08         ` David Rientjes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.2.00.1006181513060.20110@chino.kir.corp.google.com \
    --to=rientjes@google.com \
    --cc=cl@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox