linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	netdev@vger.kernel.org, trond.myklebust@fys.uio.no,
	neilb@suse.de, miklos@szeredi.hu, penberg@cs.helsinki.fi,
	a.p.zijlstra@chello.nl
Subject: [PATCH 05/30] mm: slb: add knowledge of reserve pages
Date: Thu, 20 Mar 2008 21:10:47 +0100	[thread overview]
Message-ID: <20080320202121.131372000@chello.nl> (raw)
In-Reply-To: <20080320201042.675090000@chello.nl>

[-- Attachment #1: reserve-slub.patch --]
[-- Type: text/plain, Size: 9772 bytes --]

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it. This is done to ensure reserve pages don't
leak out and get consumed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/slub_def.h |    1 
 mm/slab.c                |   60 +++++++++++++++++++++++++++++++++++++++--------
 mm/slub.c                |   27 +++++++++++++++++----
 3 files changed, 74 insertions(+), 14 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -21,6 +21,7 @@
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
+#include "internal.h"
 
 /*
  * Lock order:
@@ -1065,7 +1066,8 @@ static void setup_object(struct kmem_cac
 		s->ctor(s, object);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static
+struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
@@ -1091,6 +1093,7 @@ static struct page *new_slab(struct kmem
 	if (order)
 		page[1].inuse = objects;
 
+	*reserve = page->reserve;
 	n = get_node(s, page_to_nid(page));
 	if (n) {
 		atomic_long_inc(&n->nr_slabs);
@@ -1490,7 +1493,17 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
+	int reserve;
 
+	if (unlikely(c->reserve)) {
+		/*
+		 * If the current slab is a reserve slab and the current
+		 * allocation context does not allow access to the reserves we
+		 * must force an allocation to test the current levels.
+		 */
+		if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+			goto grow_slab;
+	}
 	if (!c->page)
 		goto new_slab;
 
@@ -1504,7 +1517,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SlabDebug(c->page)))
+	if (unlikely(SlabDebug(c->page) || c->reserve))
 		goto debug;
 
 	c->freelist = object[c->offset];
@@ -1527,16 +1540,18 @@ new_slab:
 		goto load_freelist;
 	}
 
+grow_slab:
 	if (gfpflags & __GFP_WAIT)
 		local_irq_enable();
 
-	new = new_slab(s, gfpflags, node);
+	new = new_slab(s, gfpflags, node, &reserve);
 
 	if (gfpflags & __GFP_WAIT)
 		local_irq_disable();
 
 	if (new) {
 		c = get_cpu_slab(s, smp_processor_id());
+		c->reserve = reserve;
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1548,7 +1563,8 @@ new_slab:
 
 	return NULL;
 debug:
-	if (!alloc_debug_processing(s, c->page, object, addr))
+	if (SlabDebug(c->page) &&
+			!alloc_debug_processing(s, c->page, object, addr))
 		goto another_slab;
 
 	c->page->inuse++;
@@ -2038,10 +2054,11 @@ static struct kmem_cache_node *early_kme
 	struct page *page;
 	struct kmem_cache_node *n;
 	unsigned long flags;
+	int reserve;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, gfpflags, node);
+	page = new_slab(kmalloc_caches, gfpflags, node, &reserve);
 
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h
+++ linux-2.6/include/linux/slub_def.h
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
 	int node;		/* The node of the page (or -1 for debug) */
 	unsigned int offset;	/* Freepointer offset (in word units) */
 	unsigned int objsize;	/* Size of an object (from kmem_cache) */
+	int reserve;		/* Did the current page come from the reserve */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -115,6 +115,8 @@
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
+#include 	"internal.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -261,7 +263,8 @@ struct array_cache {
 	unsigned int avail;
 	unsigned int limit;
 	unsigned int batchcount;
-	unsigned int touched;
+	unsigned int touched:1,
+		     reserve:1;
 	spinlock_t lock;
 	void *entry[];	/*
 			 * Must have this definition in here for the proper
@@ -757,6 +760,27 @@ static inline struct array_cache *cpu_ca
 	return cachep->array[smp_processor_id()];
 }
 
+/*
+ * If the last page came from the reserves, and the current allocation context
+ * does not have access to them, force an allocation to test the watermarks.
+ */
+static inline int slab_force_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	if (unlikely(cpu_cache_get(cachep)->reserve) &&
+			!(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+		return 1;
+
+	return 0;
+}
+
+static inline void slab_set_reserve(struct kmem_cache *cachep, int reserve)
+{
+	struct array_cache *ac = cpu_cache_get(cachep);
+
+	if (unlikely(ac->reserve != reserve))
+		ac->reserve = reserve;
+}
+
 static inline struct kmem_cache *__find_general_cachep(size_t size,
 							gfp_t gfpflags)
 {
@@ -956,6 +980,7 @@ static struct array_cache *alloc_arrayca
 		nc->limit = entries;
 		nc->batchcount = batchcount;
 		nc->touched = 0;
+		nc->reserve = 0;
 		spin_lock_init(&nc->lock);
 	}
 	return nc;
@@ -1659,7 +1684,8 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+		int *reserve)
 {
 	struct page *page;
 	int nr_pages;
@@ -1681,6 +1707,7 @@ static void *kmem_getpages(struct kmem_c
 	if (!page)
 		return NULL;
 
+	*reserve = page->reserve;
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		add_zone_page_state(page_zone(page),
@@ -2109,6 +2136,7 @@ static int __init_refok setup_cpu_cache(
 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 	cpu_cache_get(cachep)->batchcount = 1;
 	cpu_cache_get(cachep)->touched = 0;
+	cpu_cache_get(cachep)->reserve = 0;
 	cachep->batchcount = 1;
 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
 	return 0;
@@ -2764,6 +2792,7 @@ static int cache_grow(struct kmem_cache 
 	size_t offset;
 	gfp_t local_flags;
 	struct kmem_list3 *l3;
+	int reserve;
 
 	/*
 	 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2802,7 +2831,7 @@ static int cache_grow(struct kmem_cache 
 	 * 'nodeid'.
 	 */
 	if (!objp)
-		objp = kmem_getpages(cachep, local_flags, nodeid);
+		objp = kmem_getpages(cachep, local_flags, nodeid, &reserve);
 	if (!objp)
 		goto failed;
 
@@ -2819,6 +2848,7 @@ static int cache_grow(struct kmem_cache 
 	if (local_flags & __GFP_WAIT)
 		local_irq_disable();
 	check_irq_off();
+	slab_set_reserve(cachep, reserve);
 	spin_lock(&l3->list_lock);
 
 	/* Make slab active. */
@@ -2953,7 +2983,8 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep,
+		gfp_t flags, int must_refill)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2963,6 +2994,8 @@ static void *cache_alloc_refill(struct k
 retry:
 	check_irq_off();
 	node = numa_node_id();
+	if (unlikely(must_refill))
+		goto force_grow;
 	ac = cpu_cache_get(cachep);
 	batchcount = ac->batchcount;
 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3030,11 +3063,14 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
+force_grow:
 		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
-		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+
+		/* no objects in sight? abort */
+		if (!x && (ac->avail == 0 || must_refill))
 			return NULL;
 
 		if (!ac->avail)		/* objects refilled by interrupt? */
@@ -3189,17 +3225,18 @@ static inline void *____cache_alloc(stru
 {
 	void *objp;
 	struct array_cache *ac;
+	int must_refill = slab_force_alloc(cachep, flags);
 
 	check_irq_off();
 
 	ac = cpu_cache_get(cachep);
-	if (likely(ac->avail)) {
+	if (likely(ac->avail && !must_refill)) {
 		STATS_INC_ALLOCHIT(cachep);
 		ac->touched = 1;
 		objp = ac->entry[--ac->avail];
 	} else {
 		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = cache_alloc_refill(cachep, flags, must_refill);
 	}
 	return objp;
 }
@@ -3243,7 +3280,7 @@ static void *fallback_alloc(struct kmem_
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(flags);
 	void *obj = NULL;
-	int nid;
+	int nid, reserve;
 
 	if (flags & __GFP_THISNODE)
 		return NULL;
@@ -3276,10 +3313,11 @@ retry:
 		if (local_flags & __GFP_WAIT)
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
-		obj = kmem_getpages(cache, local_flags, -1);
+		obj = kmem_getpages(cache, local_flags, -1, &reserve);
 		if (local_flags & __GFP_WAIT)
 			local_irq_disable();
 		if (obj) {
+			slab_set_reserve(cache, reserve);
 			/*
 			 * Insert into the appropriate per node queues
 			 */
@@ -3318,6 +3356,9 @@ static void *____cache_alloc_node(struct
 	l3 = cachep->nodelists[nodeid];
 	BUG_ON(!l3);
 
+	if (unlikely(slab_force_alloc(cachep, flags)))
+		goto force_grow;
+
 retry:
 	check_irq_off();
 	spin_lock(&l3->list_lock);
@@ -3355,6 +3396,7 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
+force_grow:
 	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
 	if (x)
 		goto retry;

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-03-20 20:10 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-20 20:10 [PATCH 00/30] Swap over NFS -v17 Peter Zijlstra
2008-03-20 20:10 ` [PATCH 01/30] swap over network documentation Peter Zijlstra, Neil Brown
2008-03-20 21:20   ` Randy Dunlap
2008-03-20 20:10 ` [PATCH 02/30] mm: gfp_to_alloc_flags() Peter Zijlstra
2008-03-20 20:10 ` [PATCH 03/30] mm: tag reseve pages Peter Zijlstra
2008-03-20 20:10 ` [PATCH 04/30] mm: slub: trivial cleanups Peter Zijlstra
2008-03-20 20:10 ` Peter Zijlstra [this message]
2008-03-20 20:10 ` [PATCH 06/30] mm: kmem_alloc_estimate() Peter Zijlstra
2008-03-20 20:10 ` [PATCH 07/30] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2008-03-20 20:10 ` [PATCH 08/30] mm: serialize access to min_free_kbytes Peter Zijlstra
2008-03-20 20:10 ` [PATCH 09/30] mm: emergency pool Peter Zijlstra
2008-03-20 20:10 ` [PATCH 10/30] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2008-03-20 20:10 ` [PATCH 11/30] mm: __GFP_MEMALLOC Peter Zijlstra
2008-03-20 20:10 ` [PATCH 12/30] mm: memory reserve management Peter Zijlstra
2008-03-20 20:10 ` [PATCH 13/30] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2008-03-20 20:10 ` [PATCH 14/30] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2008-03-20 20:10 ` [PATCH 15/30] net: packet split receive api Peter Zijlstra
2008-03-20 20:10 ` [PATCH 16/30] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2008-03-20 20:10 ` [PATCH 17/30] netvm: network reserve infrastructure Peter Zijlstra
2008-03-20 20:11 ` [PATCH 18/30] netvm: INET reserves Peter Zijlstra
2008-03-20 20:11 ` [PATCH 19/30] netvm: hook skb allocation to reserves Peter Zijlstra
2008-03-20 20:11 ` [PATCH 20/30] netvm: filter emergency skbs Peter Zijlstra
2008-03-20 20:11 ` [PATCH 21/30] netvm: prevent a stream specific deadlock Peter Zijlstra
2008-03-20 20:11 ` [PATCH 22/30] netfilter: NF_QUEUE vs emergency skbs Peter Zijlstra
2008-03-20 20:11 ` [PATCH 23/30] netvm: skb processing Peter Zijlstra
2008-03-20 20:11 ` [PATCH 24/30] mm: add support for non block device backed swap files Peter Zijlstra
2008-03-20 20:11 ` [PATCH 25/30] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2008-03-20 20:11 ` [PATCH 26/30] nfs: remove mempools Peter Zijlstra
2008-03-20 20:11 ` [PATCH 27/30] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2008-03-20 20:11 ` [PATCH 28/30] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2008-03-20 20:11 ` [PATCH 29/30] nfs: enable swap on NFS Peter Zijlstra
2008-03-20 20:11 ` [PATCH 30/30] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2008-07-24 14:00 [PATCH 00/30] Swap over NFS -v18 Peter Zijlstra
2008-07-24 14:00 ` [PATCH 05/30] mm: slb: add knowledge of reserve pages Peter Zijlstra
2008-08-12  5:35   ` Neil Brown
2008-08-12  7:22     ` Peter Zijlstra
2008-08-12  9:35       ` Neil Brown
2008-08-12 10:23         ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080320202121.131372000@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=miklos@szeredi.hu \
    --cc=neilb@suse.de \
    --cc=netdev@vger.kernel.org \
    --cc=penberg@cs.helsinki.fi \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@fys.uio.no \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox