From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, David Miller <davem@davemloft.net>,
Andrew Morton <akpm@linux-foundation.org>,
Daniel Phillips <phillips@google.com>,
Pekka Enberg <penberg@cs.helsinki.fi>,
Christoph Lameter <clameter@sgi.com>,
Matt Mackall <mpm@selenic.com>,
Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
Steve Dickson <SteveD@redhat.com>
Subject: Re: [PATCH 04/10] mm: slub: add knowledge of reserve pages
Date: Mon, 20 Aug 2007 09:38:33 +0200 [thread overview]
Message-ID: <1187595513.6114.176.camel@twins> (raw)
In-Reply-To: <20070806103658.603735000@chello.nl>
[-- Attachment #1: Type: text/plain, Size: 6382 bytes --]
Ok, so I got rid of the global stuff, this also obsoletes 3/10.
---
Subject: mm: slub: add knowledge of reserve pages
Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it.
Care is taken to only touch the SLUB slow path.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@sgi.com>
---
mm/slub.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 69 insertions(+), 18 deletions(-)
Index: linux-2.6-2/mm/slub.c
===================================================================
--- linux-2.6-2.orig/mm/slub.c
+++ linux-2.6-2/mm/slub.c
@@ -20,11 +20,12 @@
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>
+#include "internal.h"
/*
* Lock order:
* 1. slab_lock(page)
- * 2. slab->list_lock
+ * 2. node->list_lock
*
* The slab_lock protects operations on the object of a particular
* slab and its metadata in the page struct. If the slab lock
@@ -1069,7 +1070,7 @@ static void setup_object(struct kmem_cac
s->ctor(object, s, 0);
}
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
{
struct page *page;
struct kmem_cache_node *n;
@@ -1087,6 +1088,7 @@ static struct page *new_slab(struct kmem
if (!page)
goto out;
+ *reserve = page->reserve;
n = get_node(s, page_to_nid(page));
if (n)
atomic_long_inc(&n->nr_slabs);
@@ -1403,12 +1405,36 @@ static inline void flush_slab(struct kme
}
/*
+ * cpu slab reserve magic
+ *
+ * we mark reserve status in the lsb of the ->cpu_slab[] pointer.
+ */
+static inline unsigned long cpu_slab_reserve(struct kmem_cache *s, int cpu)
+{
+ return unlikely((unsigned long)s->cpu_slab[cpu] & 1);
+}
+
+static inline void
+cpu_slab_set(struct kmem_cache *s, int cpu, struct page *page, int reserve)
+{
+ if (unlikely(reserve))
+ page = (struct page *)((unsigned long)page | 1);
+
+ s->cpu_slab[cpu] = page;
+}
+
+static inline struct page *cpu_slab(struct kmem_cache *s, int cpu)
+{
+ return (struct page *)((unsigned long)s->cpu_slab[cpu] & ~1UL);
+}
+
+/*
* Flush cpu slab.
* Called from IPI handler with interrupts disabled.
*/
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
- struct page *page = s->cpu_slab[cpu];
+ struct page *page = cpu_slab(s, cpu);
if (likely(page))
flush_slab(s, page, cpu);
@@ -1457,10 +1483,22 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
int cpu = smp_processor_id();
+ int reserve = 0;
if (!page)
goto new_slab;
+ if (cpu_slab_reserve(s, cpu)) {
+ /*
+ * If the current slab is a reserve slab and the current
+ * allocation context does not allow access to the reserves
+ * we must force an allocation to test the current levels.
+ */
+ if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+ goto alloc_slab;
+ reserve = 1;
+ }
+
slab_lock(page);
if (unlikely(node != -1 && page_to_nid(page) != node))
goto another_slab;
@@ -1468,10 +1506,9 @@ load_freelist:
object = page->freelist;
if (unlikely(!object))
goto another_slab;
- if (unlikely(SlabDebug(page)))
+ if (unlikely(SlabDebug(page) || reserve))
goto debug;
- object = page->freelist;
page->lockless_freelist = object[page->offset];
page->inuse = s->objects;
page->freelist = NULL;
@@ -1484,14 +1521,28 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- s->cpu_slab[cpu] = page;
+ cpu_slab_set(s, cpu, page, reserve);
goto load_freelist;
}
- page = new_slab(s, gfpflags, node);
+alloc_slab:
+ page = new_slab(s, gfpflags, node, &reserve);
if (page) {
+ struct page *slab;
+
cpu = smp_processor_id();
- if (s->cpu_slab[cpu]) {
+ slab = cpu_slab(s, cpu);
+
+ if (cpu_slab_reserve(s, cpu) && !reserve) {
+ /*
+ * If the current cpu_slab is a reserve slab but we
+ * managed to allocate a new slab the pressure is
+ * lifted and we can unmark the current one.
+ */
+ cpu_slab_set(s, cpu, slab, 0);
+ }
+
+ if (slab) {
/*
* Someone else populated the cpu_slab while we
* enabled interrupts, or we have gotten scheduled
@@ -1499,29 +1550,28 @@ new_slab:
* requested node even if __GFP_THISNODE was
* specified. So we need to recheck.
*/
- if (node == -1 ||
- page_to_nid(s->cpu_slab[cpu]) == node) {
+ if (node == -1 || page_to_nid(slab) == node) {
/*
* Current cpuslab is acceptable and we
* want the current one since its cache hot
*/
discard_slab(s, page);
- page = s->cpu_slab[cpu];
+ page = slab;
slab_lock(page);
goto load_freelist;
}
/* New slab does not fit our expectations */
- flush_slab(s, s->cpu_slab[cpu], cpu);
+ flush_slab(s, slab, cpu);
}
slab_lock(page);
SetSlabFrozen(page);
- s->cpu_slab[cpu] = page;
+ cpu_slab_set(s, cpu, page, reserve);
goto load_freelist;
}
return NULL;
debug:
- object = page->freelist;
- if (!alloc_debug_processing(s, page, object, addr))
+ if (SlabDebug(page) &&
+ !alloc_debug_processing(s, page, object, addr))
goto another_slab;
page->inuse++;
@@ -1548,7 +1598,7 @@ static void __always_inline *slab_alloc(
unsigned long flags;
local_irq_save(flags);
- page = s->cpu_slab[smp_processor_id()];
+ page = cpu_slab(s, smp_processor_id());
if (unlikely(!page || !page->lockless_freelist ||
(node != -1 && page_to_nid(page) != node)))
@@ -1873,10 +1923,11 @@ static struct kmem_cache_node * __init e
{
struct page *page;
struct kmem_cache_node *n;
+ int reserve;
BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
- page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+ page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &reserve);
BUG_ON(!page);
n = page->freelist;
@@ -3189,7 +3240,7 @@ static unsigned long slab_objects(struct
per_cpu = nodes + nr_node_ids;
for_each_possible_cpu(cpu) {
- struct page *page = s->cpu_slab[cpu];
+ struct page *page = cpu_slab(s, cpu);
int node;
if (page) {
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
next prev parent reply other threads:[~2007-08-20 7:38 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-06 10:29 [PATCH 00/10] foundations for reserve-based allocation Peter Zijlstra
2007-08-06 10:29 ` [PATCH 01/10] mm: gfp_to_alloc_flags() Peter Zijlstra
2007-08-06 10:29 ` [PATCH 02/10] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2007-08-06 18:11 ` Christoph Lameter
2007-08-06 18:21 ` Daniel Phillips
2007-08-06 18:31 ` Peter Zijlstra
2007-08-06 18:43 ` Daniel Phillips
2007-08-06 19:11 ` Christoph Lameter
2007-08-06 19:31 ` Peter Zijlstra
2007-08-06 20:12 ` Christoph Lameter
2007-08-06 18:42 ` Christoph Lameter
2007-08-06 18:48 ` Daniel Phillips
2007-08-06 18:51 ` Christoph Lameter
2007-08-06 19:15 ` Daniel Phillips
2007-08-06 20:12 ` Matt Mackall
2007-08-06 20:19 ` Christoph Lameter
2007-08-06 20:26 ` Peter Zijlstra
2007-08-06 21:05 ` Christoph Lameter
2007-08-06 22:59 ` Daniel Phillips
2007-08-06 23:14 ` Christoph Lameter
2007-08-06 23:49 ` Daniel Phillips
2007-08-07 22:18 ` Christoph Lameter
2007-08-08 7:24 ` Peter Zijlstra
2007-08-08 18:06 ` Christoph Lameter
2007-08-08 7:37 ` Daniel Phillips
2007-08-08 18:09 ` Christoph Lameter
2007-08-09 18:41 ` Daniel Phillips
2007-08-09 18:49 ` Christoph Lameter
2007-08-10 0:17 ` Daniel Phillips
2007-08-10 1:48 ` Christoph Lameter
2007-08-10 3:34 ` Daniel Phillips
2007-08-10 3:48 ` Christoph Lameter
2007-08-10 8:15 ` Daniel Phillips
2007-08-10 17:46 ` Christoph Lameter
2007-08-10 23:25 ` Daniel Phillips
2007-08-13 6:55 ` Daniel Phillips
2007-08-13 23:04 ` Christoph Lameter
2007-08-06 20:27 ` Andrew Morton
2007-08-06 23:16 ` Daniel Phillips
2007-08-06 22:47 ` Daniel Phillips
2007-08-06 10:29 ` [PATCH 03/10] mm: tag reseve pages Peter Zijlstra
2007-08-06 18:11 ` Christoph Lameter
2007-08-06 18:13 ` Daniel Phillips
2007-08-06 18:28 ` Peter Zijlstra
2007-08-06 19:34 ` Andi Kleen
2007-08-06 18:43 ` Christoph Lameter
2007-08-06 18:47 ` Peter Zijlstra
2007-08-06 18:59 ` Andi Kleen
2007-08-06 19:09 ` Christoph Lameter
2007-08-06 19:10 ` Andrew Morton
2007-08-06 19:16 ` Christoph Lameter
2007-08-06 19:38 ` Matt Mackall
2007-08-06 20:18 ` Andi Kleen
2007-08-06 10:29 ` [PATCH 04/10] mm: slub: add knowledge of reserve pages Peter Zijlstra
2007-08-08 0:13 ` Christoph Lameter
2007-08-08 1:44 ` Matt Mackall
2007-08-08 17:13 ` Christoph Lameter
2007-08-08 17:39 ` Andrew Morton
2007-08-08 17:57 ` Christoph Lameter
2007-08-08 18:46 ` Andrew Morton
2007-08-10 1:54 ` Daniel Phillips
2007-08-10 2:01 ` Christoph Lameter
2007-08-20 7:38 ` Peter Zijlstra [this message]
2007-08-20 7:43 ` Peter Zijlstra
2007-08-20 9:12 ` Pekka J Enberg
2007-08-20 9:17 ` Peter Zijlstra
2007-08-20 9:28 ` Pekka Enberg
2007-08-20 19:26 ` Christoph Lameter
2007-08-20 20:08 ` Peter Zijlstra
2007-08-06 10:29 ` [PATCH 05/10] mm: allow mempool to fall back to memalloc reserves Peter Zijlstra
2007-08-06 10:29 ` [PATCH 06/10] mm: kmem_estimate_pages() Peter Zijlstra
2007-08-06 10:29 ` [PATCH 07/10] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2007-08-06 10:29 ` [PATCH 08/10] mm: serialize access to min_free_kbytes Peter Zijlstra
2007-08-06 10:29 ` [PATCH 09/10] mm: emergency pool Peter Zijlstra
2007-08-06 10:29 ` [PATCH 10/10] mm: __GFP_MEMALLOC Peter Zijlstra
2007-08-06 17:35 ` [PATCH 00/10] foundations for reserve-based allocation Daniel Phillips
2007-08-06 18:17 ` Peter Zijlstra
2007-08-06 18:40 ` Daniel Phillips
2007-08-06 19:31 ` Daniel Phillips
2007-08-06 19:36 ` Peter Zijlstra
2007-08-06 19:53 ` Daniel Phillips
2007-08-06 17:56 ` Christoph Lameter
2007-08-06 18:33 ` Peter Zijlstra
2007-08-06 20:23 ` Matt Mackall
2007-08-07 0:09 ` Daniel Phillips
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1187595513.6114.176.camel@twins \
--to=a.p.zijlstra@chello.nl \
--cc=Lee.Schermerhorn@hp.com \
--cc=SteveD@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=clameter@sgi.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mpm@selenic.com \
--cc=penberg@cs.helsinki.fi \
--cc=phillips@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox