linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Matthew Dobson <colpatch@us.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Linux Memory Management <linux-mm@kvack.org>
Subject: [RFC][PATCH 8/8] Add support critical pool support to the slab allocator
Date: Fri, 18 Nov 2005 11:47:05 -0800	[thread overview]
Message-ID: <437E2FB9.7050808@us.ibm.com> (raw)
In-Reply-To: <437E2C69.4000708@us.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 145 bytes --]

Finally, teach the slab allocator how to deal with critical pages and how
to keep them for use exclusively by __GFP_CRITICAL allocations.

-Matt

[-- Attachment #2: slab_support.patch --]
[-- Type: text/x-patch, Size: 6638 bytes --]

Modify the Slab Allocator to support the addition of a Critical Pool to the VM.
What we want is to ensure that if a cache is allocated a new slab page from the
Critical Pool during an Emergency situation, that only other __GFP_CRITICAL
allocations are satisfied from that slab.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

Index: linux-2.6.15-rc1+critical_pool/mm/slab.c
===================================================================
--- linux-2.6.15-rc1+critical_pool.orig/mm/slab.c	2005-11-17 16:51:22.965173864 -0800
+++ linux-2.6.15-rc1+critical_pool/mm/slab.c	2005-11-17 17:22:03.056437472 -0800
@@ -220,6 +220,7 @@ struct slab {
 	unsigned long		colouroff;
 	void			*s_mem;		/* including colour offset */
 	unsigned int		inuse;		/* # of objs active in slab */
+	unsigned short		critical;	/* is this an critical slab? */
 	kmem_bufctl_t		free;
 	unsigned short          nid;		/* node number slab is on */
 };
@@ -395,6 +396,9 @@ struct kmem_cache {
 	unsigned int		slab_size;
 	unsigned int		dflags;		/* dynamic flags */
 
+	/* list of critical slabs for this cache */
+	struct list_head	slabs_crit;
+
 	/* constructor func */
 	void (*ctor)(void *, kmem_cache_t *, unsigned long);
 
@@ -1770,6 +1774,7 @@ kmem_cache_t *kmem_cache_create(const ch
 		cachep->gfpflags |= GFP_DMA;
 	spin_lock_init(&cachep->spinlock);
 	cachep->objsize = size;
+	INIT_LIST_HEAD(&cachep->slabs_crit);
 
 	if (flags & CFLGS_OFF_SLAB)
 		cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
@@ -2090,6 +2095,7 @@ static struct slab *alloc_slabmgmt(kmem_
 	slabp->inuse = 0;
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
+	slabp->critical = 0;
 
 	return slabp;
 }
@@ -2182,7 +2188,8 @@ static void return_object(kmem_cache_t *
 
 #if DEBUG
 	/* Verify that the slab belongs to the intended node */
-	WARN_ON(slabp->nid != nid);
+	if (nid >= 0)
+		WARN_ON(slabp->nid != nid);
 
 	if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
 		printk(KERN_ERR "slab: double free detected in cache "
@@ -2341,6 +2348,24 @@ bad:
 #define check_slabp(x,y)			do { } while(0)
 #endif
 
+static inline struct slab *get_critical_slab(kmem_cache_t *cachep, gfp_t flags)
+{
+	struct slab *slabp = NULL;
+
+	spin_lock(&cachep->spinlock);
+	/* search for any partially free critical slabs */
+	if (!list_empty(&cachep->slabs_crit)) {
+		list_for_each_entry(slabp, &cachep->slabs_crit, list)
+			if (slabp->free != BUFCTL_END)
+				goto found;
+		slabp = NULL;
+	}
+found:
+	spin_unlock(&cachep->spinlock);
+
+	return slabp;
+}
+
 /**
  * Helper function for cache_grow().  Handle cache coloring, allocating a
  * struct slab and initializing the slab.
@@ -2396,10 +2421,11 @@ out:
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nid)
+static void *cache_grow(kmem_cache_t *cachep, gfp_t flags, int nid)
 {
 	struct slab *slabp = NULL;
 	void *objp = NULL;
+	int critical = is_emergency_alloc(flags);
 
 	/*
 	 * Be lazy and only check for valid flags here,
@@ -2411,6 +2437,13 @@ static int cache_grow(kmem_cache_t *cach
 		goto out;
 
 	/*
+	 * We are in an emergency situation and this is a 'critical' alloc,
+	 * so check if we've got an existing critical slab first
+	 */
+	if (critical && (slabp = get_critical_slab(cachep, flags)))
+		goto got_critical_slab;
+
+	/*
 	 * Ensure caller isn't asking for DMA memory if the slab wasn't created
 	 * with the SLAB_DMA flag.
 	 * Also ensure the caller *is* asking for DMA memory if the slab was
@@ -2431,13 +2464,34 @@ static int cache_grow(kmem_cache_t *cach
 
 		STATS_INC_GROWN(cachep);
 		/* Make slab active. */
-		spin_lock(&l3->list_lock);
-		list_add_tail(&slabp->list, &l3->slabs_free);
-		l3->free_objects += cachep->num;
-		spin_unlock(&l3->list_lock);
+		if (!critical) {
+			spin_lock(&l3->list_lock);
+			list_add_tail(&slabp->list, &l3->slabs_free);
+			l3->free_objects += cachep->num;
+			spin_unlock(&l3->list_lock);
+		} else {
+			spin_lock(&cachep->spinlock);
+			list_add_tail(&slabp->list, &cachep->slabs_crit);
+			slabp->critical = 1;
+			spin_unlock(&cachep->spinlock);
+got_critical_slab:
+			objp = get_object(cachep, slabp, nid);
+			check_slabp(cachep, slabp);
+		}
 	}
 out:
-	return objp != NULL;
+	return objp;
+}
+
+static inline int is_critical_object(void *obj)
+{
+	struct slab *slabp;
+
+	if (!obj)
+		return 0;
+
+	slabp = GET_PAGE_SLAB(virt_to_page(obj));
+	return slabp->critical;
 }
 
 static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
@@ -2516,12 +2570,15 @@ alloc_done:
 	spin_unlock(&l3->list_lock);
 
 	if (unlikely(!ac->avail)) {
-		int x;
-		x = cache_grow(cachep, flags, numa_node_id());
+		void *obj = cache_grow(cachep, flags, numa_node_id());
+
+		/* critical objects don't "grow" the slab, just return 'obj' */
+		if (is_critical_object(obj))
+			return obj;
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = ac_data(cachep);
-		if (!x && ac->avail == 0) /* no objects in sight? abort      */
+		if (!obj && ac->avail == 0) /* No objects in sight?  Abort.  */
 			return NULL;
 
 		if (!ac->avail)		  /* objects refilled by interrupt?  */
@@ -2633,7 +2690,6 @@ static void *__cache_alloc_node(kmem_cac
 	struct slab *slabp;
 	struct kmem_list3 *l3;
 	void *obj;
-	int x;
 
 	l3 = cachep->nodelists[nid];
 	BUG_ON(!l3);
@@ -2675,11 +2731,15 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
-	x = cache_grow(cachep, flags, nid);
+	obj = cache_grow(cachep, flags, nid);
 
-	if (!x)
+	if (!obj)
 		return NULL;
 
+	/* critical objects don't "grow" the slab, just return 'obj' */
+	if (is_critical_object(obj))
+		goto done;
+
 	goto retry;
 done:
 	return obj;
@@ -2780,6 +2840,22 @@ free_done:
 		sizeof(void *) * ac->avail);
 }
 
+static inline void free_critical_object(kmem_cache_t *cachep, void *objp)
+{
+	struct slab *slabp = GET_PAGE_SLAB(virt_to_page(objp));
+
+	check_slabp(cachep, slabp);
+	return_object(cachep, slabp, objp, -1);
+	check_slabp(cachep, slabp);
+
+	if (slabp->inuse == 0) {
+		BUG_ON(cachep->flags & SLAB_DESTROY_BY_RCU);
+		BUG_ON(cachep->gfporder);
+
+		list_del(&slabp->list);
+		slab_destroy(cachep, slabp);
+	}
+}
 
 /**
  * __cache_free
@@ -2795,6 +2871,11 @@ static inline void __cache_free(kmem_cac
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	if (is_critical_object(objp)) {
+		free_critical_object(cachep, objp);
+		return;
+	}
+
 	/*
 	 * Make sure we are not freeing a object from another
 	 * node to the array cache on this cpu.

  parent reply	other threads:[~2005-11-18 19:47 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-18 19:32 [RFC][PATCH 0/8] Critical Page Pool Matthew Dobson
2005-11-18 19:36 ` [RFC][PATCH 1/8] Create " Matthew Dobson
2005-11-19  0:08   ` Paul Jackson
2005-11-21  5:50     ` Matthew Dobson
2005-11-21  5:54       ` Paul Jackson
2005-11-18 19:36 ` [RFC][PATCH 2/8] Create emergency trigger Matthew Dobson
2005-11-19  0:21   ` Paul Jackson
2005-11-21  5:51     ` Matthew Dobson
2005-11-18 19:40 ` [RFC][PATCH 3/8] Slab cleanup Matthew Dobson
2005-11-18 19:41 ` [RFC][PATCH 4/8] Fix a bug in scsi_get_command Matthew Dobson
2005-11-18 19:43 ` [RFC][PATCH 5/8] get_object/return_object Matthew Dobson
2005-11-18 19:44 ` [RFC][PATCH 6/8] slab_destruct Matthew Dobson
2005-11-18 19:44 ` [RFC][PATCH 0/8] Critical Page Pool Avi Kivity
2005-11-18 19:51   ` Matthew Dobson
2005-11-18 20:42     ` Avi Kivity
2005-11-19  0:10       ` Paul Jackson
2005-11-21  5:36       ` Matthew Dobson
2005-11-18 19:45 ` [RFC][PATCH 7/8] __cache_grow() Matthew Dobson
2005-11-18 19:47 ` Matthew Dobson [this message]
2005-11-18 19:56 ` [RFC][PATCH 0/8] Critical Page Pool Chris Wright
2005-11-21  5:47   ` Matthew Dobson
2005-11-21 13:29     ` Pavel Machek
2005-12-06 22:54       ` Matthew Dobson
2005-12-10  8:39         ` Pavel Machek
2005-11-20  7:45 ` Keith Owens
2005-11-21  5:53   ` Matthew Dobson
2005-11-20 23:04 ` Pavel Machek
2005-11-21  5:58   ` Matthew Dobson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=437E2FB9.7050808@us.ibm.com \
    --to=colpatch@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox