Modify the Slab Allocator to support the addition of a Critical Pool to the VM. What we want is to ensure that if a cache is allocated a new slab page from the Critical Pool during an Emergency situation, that only other __GFP_CRITICAL allocations are satisfied from that slab. Signed-off-by: Matthew Dobson Index: linux-2.6.15-rc5+critical_pool/mm/slab.c =================================================================== --- linux-2.6.15-rc5+critical_pool.orig/mm/slab.c 2005-12-13 16:14:25.757617592 -0800 +++ linux-2.6.15-rc5+critical_pool/mm/slab.c 2005-12-13 16:32:08.300086584 -0800 @@ -221,8 +221,9 @@ struct slab { unsigned long colouroff; void *s_mem; /* including colour offset */ unsigned int inuse; /* num of objs active in slab */ - kmem_bufctl_t free; + unsigned short critical; /* is this an critical slab? */ unsigned short nodeid; + kmem_bufctl_t free; }; /* @@ -395,6 +396,9 @@ struct kmem_cache { unsigned int slab_size; unsigned int dflags; /* dynamic flags */ + /* list of critical slabs for this cache */ + struct list_head slabs_crit; + /* constructor func */ void (*ctor)(void *, kmem_cache_t *, unsigned long); @@ -1770,6 +1774,7 @@ next: cachep->gfpflags |= GFP_DMA; spin_lock_init(&cachep->spinlock); cachep->objsize = size; + INIT_LIST_HEAD(&cachep->slabs_crit); if (flags & CFLGS_OFF_SLAB) cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); @@ -2086,6 +2091,7 @@ static struct slab* alloc_slabmgmt(kmem_ slabp->inuse = 0; slabp->colouroff = colour_off; slabp->s_mem = objp+colour_off; + slabp->critical = 0; return slabp; } @@ -2161,7 +2167,8 @@ static void *get_object(kmem_cache_t *ca next = slab_bufctl(slabp)[slabp->free]; #if DEBUG slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; - WARN_ON(slabp->nodeid != nodeid); + if (nodeid >= 0) + WARN_ON(slabp->nodeid != nodeid); #endif slabp->free = next; @@ -2175,7 +2182,8 @@ static void return_object(kmem_cache_t * #if DEBUG /* Verify that the slab belongs to the intended node */ - WARN_ON(slabp->nodeid != nodeid); + if (nodeid >= 0) + WARN_ON(slabp->nodeid != nodeid); if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { printk(KERN_ERR "slab: double free detected in cache " @@ -2324,18 +2332,64 @@ bad: #define check_slabp(x,y) do { } while(0) #endif +static inline int is_critical_object(void *obj) +{ + struct slab *slabp; + + if (!obj) + return 0; + + slabp = page_get_slab(virt_to_page(obj)); + return slabp->critical; +} + +static inline void *get_critical_object(kmem_cache_t *cachep, gfp_t flags) +{ + struct slab *slabp; + void *objp = NULL; + + spin_lock(&cachep->spinlock); + /* search for any partially free critical slabs */ + if (!list_empty(&cachep->slabs_crit)) + list_for_each_entry(slabp, &cachep->slabs_crit, list) + if (slabp->free != BUFCTL_END) { + objp = get_object(cachep, slabp, -1); + check_slabp(cachep, slabp); + break; + } + spin_unlock(&cachep->spinlock); + + return objp; +} + +static inline void free_critical_object(kmem_cache_t *cachep, void *objp) +{ + struct slab *slabp = page_get_slab(virt_to_page(objp)); + + check_slabp(cachep, slabp); + return_object(cachep, slabp, objp, -1); + check_slabp(cachep, slabp); + + if (!slabp->inuse) { + BUG_ON(cachep->flags & SLAB_DESTROY_BY_RCU); + + list_del(&slabp->list); + slab_destroy(cachep, slabp); + } +} + /** * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) +static void *cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct slab *slabp; void *objp; size_t offset; gfp_t local_flags; unsigned long ctor_flags; - struct kmem_list3 *l3; + int critical = is_emergency_alloc(flags) && !cachep->gfporder; /* * Be lazy and only check for valid flags here, @@ -2344,7 +2398,14 @@ static int cache_grow(kmem_cache_t *cach if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) BUG(); if (flags & SLAB_NO_GROW) - return 0; + return NULL; + + /* + * If we are in an emergency situation and this is a 'critical' alloc, + * see if we can get an object from an existing critical slab first. + */ + if (critical && (objp = get_critical_object(cachep, flags))) + return objp; ctor_flags = SLAB_CTOR_CONSTRUCTOR; local_flags = (flags & SLAB_LEVEL_MASK); @@ -2391,21 +2452,30 @@ static int cache_grow(kmem_cache_t *cach if (local_flags & __GFP_WAIT) local_irq_disable(); check_irq_off(); - l3 = cachep->nodelists[nodeid]; - spin_lock(&l3->list_lock); /* Make slab active. */ - list_add_tail(&slabp->list, &(l3->slabs_free)); STATS_INC_GROWN(cachep); - l3->free_objects += cachep->num; - spin_unlock(&l3->list_lock); - return 1; + if (!critical) { + struct kmem_list3 *l3 = cachep->nodelists[nodeid]; + spin_lock(&l3->list_lock); + list_add_tail(&slabp->list, &l3->slabs_free); + l3->free_objects += cachep->num; + spin_unlock(&l3->list_lock); + } else { + slabp->critical = 1; + spin_lock(&cachep->spinlock); + list_add_tail(&slabp->list, &cachep->slabs_crit); + spin_unlock(&cachep->spinlock); + objp = get_object(cachep, slabp, -1); + check_slabp(cachep, slabp); + } + return objp; failed_freepages: kmem_freepages(cachep, objp); failed: if (local_flags & __GFP_WAIT) local_irq_disable(); - return 0; + return NULL; } static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) @@ -2483,15 +2553,18 @@ alloc_done: spin_unlock(&l3->list_lock); if (unlikely(!ac->avail)) { - int x; - x = cache_grow(cachep, flags, numa_node_id()); + void *obj = cache_grow(cachep, flags, numa_node_id()); + + /* critical objects don't "grow" the slab, just return 'obj' */ + if (is_critical_object(obj)) + return obj; - // cache_grow can reenable interrupts, then ac could change. + /* cache_grow can reenable interrupts, then ac could change. */ ac = ac_data(cachep); - if (!x && ac->avail == 0) // no objects in sight? abort + if (!obj && ac->avail == 0) /* No objects in sight? Abort. */ return NULL; - if (!ac->avail) // objects refilled by interrupt? + if (!ac->avail) /* objects refilled by interrupt? */ goto retry; } ac->touched = 1; @@ -2597,7 +2670,6 @@ static void *__cache_alloc_node(kmem_cac struct slab *slabp; struct kmem_list3 *l3; void *obj; - int x; l3 = cachep->nodelists[nodeid]; BUG_ON(!l3); @@ -2639,11 +2711,15 @@ retry: must_grow: spin_unlock(&l3->list_lock); - x = cache_grow(cachep, flags, nodeid); + obj = cache_grow(cachep, flags, nodeid); - if (!x) + if (!obj) return NULL; + /* critical objects don't "grow" the slab, just return 'obj' */ + if (is_critical_object(obj)) + goto done; + goto retry; done: return obj; @@ -2758,6 +2834,11 @@ static inline void __cache_free(kmem_cac check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); + if (is_critical_object(objp)) { + free_critical_object(cachep, objp); + return; + } + /* Make sure we are not freeing a object from another * node to the array cache on this cpu. */