From: Joel Schopp <jschopp@austin.ibm.com>
To: Joel Schopp <jschopp@austin.ibm.com>
Cc: Andrew Morton <akpm@osdl.org>,
lhms <lhms-devel@lists.sourceforge.net>,
Linux Memory Management List <linux-mm@kvack.org>,
linux-kernel@vger.kernel.org, Mel Gorman <mel@csn.ul.ie>,
Mike Kravetz <kravetz@us.ibm.com>
Subject: [PATCH 10/9] percpu splitout
Date: Mon, 26 Sep 2005 15:19:29 -0500 [thread overview]
Message-ID: <433857D1.3050903@austin.ibm.com> (raw)
In-Reply-To: <4338537E.8070603@austin.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 554 bytes --]
NOT READY FOR MERGING!
Only works with NUMA off on 2.6.13. On 2.6.13 with NUMA on free_hot_cold_page
calls __free_pages_bulk, which then trips BUG_ON(bad_range(zone,page)); This
does not happen on 2.6.13-rc1 kernels. Released under the release early
release often doctrine.
This patch splits the percpu allocations into two types. Kernel reclaimable
and kernel non-reclaimable types are considered one PCPU_KERNEL type and user
types are PCPU_USER type.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
[-- Attachment #2: 10_percpu_splitout --]
[-- Type: text/plain, Size: 5504 bytes --]
Index: 2.6.13-joel2/include/linux/mmzone.h
===================================================================
--- 2.6.13-joel2.orig/include/linux/mmzone.h 2005-09-26 13:58:59.%N -0500
+++ 2.6.13-joel2/include/linux/mmzone.h 2005-09-26 13:59:38.%N -0500
@@ -57,13 +57,28 @@ struct zone_padding {
#else
#define ZONE_PADDING(name)
#endif
+/*
+ * The pcpu_list is to keep kernel and userrclm allocations
+ * apart while still allowing all allocation types to have
+ * per-cpu lists
+ */
+struct pcpu_list {
+ int count;
+ struct list_head list;
+} ____cacheline_aligned_in_smp;
+
+
+/* Indices into pcpu_list */
+#define PCPU_KERN 0
+#define PCPU_USER 1
+#define PCPU_LIST_SIZE 2
struct per_cpu_pages {
- int count; /* number of pages in the list */
- int low; /* low watermark, refill needed */
- int high; /* high watermark, emptying needed */
- int batch; /* chunk size for buddy add/remove */
- struct list_head list; /* the list of pages */
+ int count; /* number of pages in the list */
+ struct pcpu_list pcpu_list[PCPU_LIST_SIZE];
+ int low; /* low watermark, refill needed */
+ int high; /* high watermark, emptying needed */
+ int batch; /* chunk size for buddy add/remove */
};
struct per_cpu_pageset {
Index: 2.6.13-joel2/mm/page_alloc.c
===================================================================
--- 2.6.13-joel2.orig/mm/page_alloc.c 2005-09-26 13:59:27.%N -0500
+++ 2.6.13-joel2/mm/page_alloc.c 2005-09-26 13:59:38.%N -0500
@@ -775,9 +775,18 @@ void drain_remote_pages(void)
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- if (pcp->count)
- pcp->count -= free_pages_bulk(zone, pcp->count,
- &pcp->list, 0);
+ if (pcp->pcpu_list[PCPU_KERN].count)
+ pcp->pcpu_list[PCPU_KERN].count -=
+ free_pages_bulk(zone,
+ pcp->pcpu_list[PCPU_KERN].count,
+ &pcp->pcpu_list[PCPU_KERN].list,
+ 0);
+ if (pcp->pcpu_list[PCPU_USER].count)
+ pcp->pcpu_list[PCPU_USER].count -=
+ free_pages_bulk(zone,
+ pcp->pcpu_list[PCPU_USER].count,
+ &pcp->pcpu_list[PCPU_USER].list,
+ 0);
}
}
local_irq_restore(flags);
@@ -798,8 +807,18 @@ static void __drain_pages(unsigned int c
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- pcp->count -= free_pages_bulk(zone, pcp->count,
- &pcp->list, 0);
+ pcp->pcpu_list[PCPU_KERN].count -=
+ free_pages_bulk(zone,
+ pcp->pcpu_list[PCPU_KERN].count,
+ &pcp->pcpu_list[PCPU_KERN].list,
+ 0);
+
+ pcp->pcpu_list[PCPU_USER].count -=
+ free_pages_bulk(zone,
+ pcp->pcpu_list[PCPU_USER].count,
+ &pcp->pcpu_list[PCPU_USER].list,
+ 0);
+
}
}
}
@@ -881,6 +900,7 @@ static void fastcall free_hot_cold_page(
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
+ struct pcpu_list *plist;
arch_free_page(page, 0);
@@ -890,11 +910,24 @@ static void fastcall free_hot_cold_page(
page->mapping = NULL;
free_pages_check(__FUNCTION__, page);
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+
+ /*
+ * Strictly speaking, we should not be accessing the zone information
+ * here wihtout the zone lock. In this case, it does not matter if
+ * the read is incorrect.
+ */
+ if (get_pageblock_type(zone, page) == RCLM_USER)
+ plist = &pcp->pcpu_list[PCPU_USER];
+ else
+ plist = &pcp->pcpu_list[PCPU_KERN];
+
+ if (plist->count >= pcp->high)
+ plist->count -= free_pages_bulk(zone, pcp->batch,
+ &plist->list, 0);
+
local_irq_save(flags);
- list_add(&page->lru, &pcp->list);
- pcp->count++;
- if (pcp->count >= pcp->high)
- pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+ list_add(&page->lru, &plist->list);
+ plist->count++;
local_irq_restore(flags);
put_cpu();
}
@@ -930,19 +963,28 @@ buffered_rmqueue(struct zone *zone, int
unsigned long flags;
struct page *page = NULL;
int cold = !!(gfp_flags & __GFP_COLD);
+ struct pcpu_list *plist;
if (order == 0) {
struct per_cpu_pages *pcp;
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- if (pcp->count <= pcp->low)
- pcp->count += rmqueue_bulk(zone, pcp->batch,
- &pcp->list, alloctype);
- if (pcp->count) {
- page = list_entry(pcp->list.next, struct page, lru);
+
+ if (alloctype == __GFP_USER)
+ plist = &pcp->pcpu_list[PCPU_USER];
+ else
+ plist = &pcp->pcpu_list[PCPU_KERN];
+
+ if (plist->count <= pcp->low)
+ plist->count += rmqueue_bulk(zone,
+ pcp->batch,
+ &plist->list,
+ alloctype);
+ if (plist->count) {
+ page = list_entry(plist->list.next, struct page, lru);
list_del(&page->lru);
- pcp->count--;
+ plist->count--;
}
local_irq_restore(flags);
put_cpu();
@@ -2001,18 +2043,23 @@ inline void setup_pageset(struct per_cpu
struct per_cpu_pages *pcp;
pcp = &p->pcp[0]; /* hot */
- pcp->count = 0;
+ pcp->pcpu_list[PCPU_KERN].count = 0;
+ pcp->pcpu_list[PCPU_USER].count = 0;
pcp->low = 2 * batch;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
- INIT_LIST_HEAD(&pcp->list);
+ INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list);
+ INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list);
pcp = &p->pcp[1]; /* cold*/
- pcp->count = 0;
+ pcp->pcpu_list[PCPU_KERN].count = 0;
+ pcp->pcpu_list[PCPU_USER].count = 0;
pcp->low = 0;
pcp->high = 2 * batch;
pcp->batch = max(1UL, 1 * batch);
- INIT_LIST_HEAD(&pcp->list);
+ INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list);
+ INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list);
+
}
#ifdef CONFIG_NUMA
next prev parent reply other threads:[~2005-09-26 20:19 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-09-26 20:01 [PATCH 0/9] fragmentation avoidance Joel Schopp
2005-09-26 20:03 ` [PATCH 1/9] add defrag flags Joel Schopp
2005-09-27 0:16 ` Kyle Moffett
2005-09-27 0:24 ` Dave Hansen
2005-09-27 0:43 ` Kyle Moffett
2005-09-27 5:44 ` Paul Jackson
2005-09-27 13:34 ` Mel Gorman
2005-09-27 16:26 ` [Lhms-devel] " Paul Jackson
2005-09-27 18:38 ` Joel Schopp
2005-09-27 19:30 ` Paul Jackson
2005-09-27 21:00 ` [Lhms-devel] " Joel Schopp
2005-09-27 21:23 ` Paul Jackson
2005-09-27 22:03 ` Joel Schopp
2005-09-27 22:45 ` Paul Jackson
2005-09-26 20:05 ` [PATCH 2/9] declare defrag structs Joel Schopp
2005-09-26 20:06 ` [PATCH 3/9] initialize defrag Joel Schopp
2005-09-26 20:09 ` [PATCH 4/9] defrag helper functions Joel Schopp
2005-09-26 22:29 ` Alex Bligh - linux-kernel
2005-09-27 16:08 ` Joel Schopp
2005-09-26 20:11 ` [PATCH 5/9] propagate defrag alloc types Joel Schopp
2005-09-26 20:13 ` [PATCH 6/9] fragmentation avoidance core Joel Schopp
2005-09-26 20:14 ` [PATCH 7/9] try harder on large allocations Joel Schopp
2005-09-27 7:21 ` Coywolf Qi Hunt
2005-09-27 16:17 ` Joel Schopp
2005-09-26 20:16 ` [PATCH 8/9] defrag fallback Joel Schopp
2005-09-26 20:17 ` [PATCH 9/9] free memory is user reclaimable Joel Schopp
2005-09-26 20:19 ` Joel Schopp [this message]
2005-09-26 21:49 ` [Lhms-devel] [PATCH 0/9] fragmentation avoidance Joel Schopp
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=433857D1.3050903@austin.ibm.com \
--to=jschopp@austin.ibm.com \
--cc=akpm@osdl.org \
--cc=kravetz@us.ibm.com \
--cc=lhms-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mel@csn.ul.ie \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox