* [QUICKLIST 2/4] Quicklist support for IA64
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
@ 2007-04-09 18:25 ` Christoph Lameter
2007-04-09 18:25 ` [QUICKLIST 3/4] Quicklist support for x86_64 Christoph Lameter
` (5 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:25 UTC (permalink / raw)
To: akpm; +Cc: linux-mm, ak, linux-kernel, Christoph Lameter
Quicklist for IA64
IA64 is the origin of the quicklist implementation. So cut out the pieces
that are now in core code and modify the functions called.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.21-rc5-mm4/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/ia64/mm/init.c 2007-04-07 16:20:16.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/ia64/mm/init.c 2007-04-07 18:02:51.000000000 -0700
@@ -39,9 +39,6 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
-DEFINE_PER_CPU(long, __pgtable_quicklist_size);
-
extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -56,54 +53,6 @@
struct page *zero_page_memmap_ptr; /* map entry for zero page */
EXPORT_SYMBOL(zero_page_memmap_ptr);
-#define MIN_PGT_PAGES 25UL
-#define MAX_PGT_FREES_PER_PASS 16L
-#define PGT_FRACTION_OF_NODE_MEM 16
-
-static inline long
-max_pgt_pages(void)
-{
- u64 node_free_pages, max_pgt_pages;
-
-#ifndef CONFIG_NUMA
- node_free_pages = nr_free_pages();
-#else
- node_free_pages = node_page_state(numa_node_id(), NR_FREE_PAGES);
-#endif
- max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
- max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
- return max_pgt_pages;
-}
-
-static inline long
-min_pages_to_free(void)
-{
- long pages_to_free;
-
- pages_to_free = pgtable_quicklist_size - max_pgt_pages();
- pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
- return pages_to_free;
-}
-
-void
-check_pgt_cache(void)
-{
- long pages_to_free;
-
- if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
- return;
-
- preempt_disable();
- while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
- while (pages_to_free--) {
- free_page((unsigned long)pgtable_quicklist_alloc());
- }
- preempt_enable();
- preempt_disable();
- }
- preempt_enable();
-}
-
void
lazy_mmu_prot_update (pte_t pte)
{
Index: linux-2.6.21-rc5-mm4/include/asm-ia64/pgalloc.h
===================================================================
--- linux-2.6.21-rc5-mm4.orig/include/asm-ia64/pgalloc.h 2007-03-25 15:56:23.000000000 -0700
+++ linux-2.6.21-rc5-mm4/include/asm-ia64/pgalloc.h 2007-04-07 18:02:51.000000000 -0700
@@ -18,71 +18,18 @@
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/threads.h>
+#include <linux/quicklist.h>
#include <asm/mmu_context.h>
-DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
-#define pgtable_quicklist __ia64_per_cpu_var(__pgtable_quicklist)
-DECLARE_PER_CPU(long, __pgtable_quicklist_size);
-#define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size)
-
-static inline long pgtable_quicklist_total_size(void)
-{
- long ql_size = 0;
- int cpuid;
-
- for_each_online_cpu(cpuid) {
- ql_size += per_cpu(__pgtable_quicklist_size, cpuid);
- }
- return ql_size;
-}
-
-static inline void *pgtable_quicklist_alloc(void)
-{
- unsigned long *ret = NULL;
-
- preempt_disable();
-
- ret = pgtable_quicklist;
- if (likely(ret != NULL)) {
- pgtable_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- --pgtable_quicklist_size;
- preempt_enable();
- } else {
- preempt_enable();
- ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- }
-
- return ret;
-}
-
-static inline void pgtable_quicklist_free(void *pgtable_entry)
-{
-#ifdef CONFIG_NUMA
- int nid = page_to_nid(virt_to_page(pgtable_entry));
-
- if (unlikely(nid != numa_node_id())) {
- free_page((unsigned long)pgtable_entry);
- return;
- }
-#endif
-
- preempt_disable();
- *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
- pgtable_quicklist = (unsigned long *)pgtable_entry;
- ++pgtable_quicklist_size;
- preempt_enable();
-}
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return pgtable_quicklist_alloc();
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pgd_free(pgd_t * pgd)
{
- pgtable_quicklist_free(pgd);
+ quicklist_free(0, NULL, pgd);
}
#ifdef CONFIG_PGTABLE_4
@@ -94,12 +41,12 @@
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return pgtable_quicklist_alloc();
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pud_free(pud_t * pud)
{
- pgtable_quicklist_free(pud);
+ quicklist_free(0, NULL, pud);
}
#define __pud_free_tlb(tlb, pud) pud_free(pud)
#endif /* CONFIG_PGTABLE_4 */
@@ -112,12 +59,12 @@
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return pgtable_quicklist_alloc();
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pmd_free(pmd_t * pmd)
{
- pgtable_quicklist_free(pmd);
+ quicklist_free(0, NULL, pmd);
}
#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd)
@@ -137,28 +84,31 @@
static inline struct page *pte_alloc_one(struct mm_struct *mm,
unsigned long addr)
{
- void *pg = pgtable_quicklist_alloc();
+ void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
return pg ? virt_to_page(pg) : NULL;
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long addr)
{
- return pgtable_quicklist_alloc();
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pte_free(struct page *pte)
{
- pgtable_quicklist_free(page_address(pte));
+ quicklist_free_page(0, NULL, pte);
}
static inline void pte_free_kernel(pte_t * pte)
{
- pgtable_quicklist_free(pte);
+ quicklist_free(0, NULL, pte);
}
-#define __pte_free_tlb(tlb, pte) pte_free(pte)
+static inline void check_pgt_cache(void)
+{
+ quicklist_trim(0, NULL, 25, 16);
+}
-extern void check_pgt_cache(void);
+#define __pte_free_tlb(tlb, pte) pte_free(pte)
#endif /* _ASM_IA64_PGALLOC_H */
Index: linux-2.6.21-rc5-mm4/arch/ia64/mm/contig.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/ia64/mm/contig.c 2007-04-07 16:20:07.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/ia64/mm/contig.c 2007-04-07 18:02:51.000000000 -0700
@@ -88,7 +88,7 @@
printk(KERN_INFO "%d pages shared\n", total_shared);
printk(KERN_INFO "%d pages swap cached\n", total_cached);
printk(KERN_INFO "Total of %ld pages in page table cache\n",
- pgtable_quicklist_total_size());
+ quicklist_total_size());
printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
}
Index: linux-2.6.21-rc5-mm4/arch/ia64/mm/discontig.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/ia64/mm/discontig.c 2007-04-07 17:59:49.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/ia64/mm/discontig.c 2007-04-07 18:02:51.000000000 -0700
@@ -636,7 +636,7 @@
printk(KERN_INFO "%d pages shared\n", total_shared);
printk(KERN_INFO "%d pages swap cached\n", total_cached);
printk(KERN_INFO "Total of %ld pages in page table cache\n",
- pgtable_quicklist_total_size());
+ quicklist_total_size());
printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
}
Index: linux-2.6.21-rc5-mm4/arch/ia64/Kconfig
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/ia64/Kconfig 2007-04-07 17:59:49.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/ia64/Kconfig 2007-04-07 18:02:51.000000000 -0700
@@ -30,6 +30,10 @@
def_bool y
depends on !IA64_SGI_SN2
+config QUICKLIST
+ bool
+ default y
+
config MMU
bool
default y
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
2007-04-09 18:25 ` [QUICKLIST 2/4] Quicklist support for IA64 Christoph Lameter
@ 2007-04-09 18:25 ` Christoph Lameter
2007-04-09 18:43 ` Andi Kleen
2007-04-09 21:28 ` Andrew Morton
2007-04-09 18:25 ` [QUICKLIST 4/4] Quicklist support for sparc64 Christoph Lameter, David Miller
` (4 subsequent siblings)
6 siblings, 2 replies; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:25 UTC (permalink / raw)
To: akpm; +Cc: linux-mm, ak, Christoph Lameter, linux-kernel
Conver x86_64 to using quicklists
This adds caching of pgds and puds, pmds, pte. That way we can
avoid costly zeroing and initialization of special mappings in the
pgd.
A second quicklist is useful to separate out PGD handling. We can carry
the initialized pgds over to the next process needing them.
Also clean up the pgd_list handling to use regular list macros.
There is no need anymore to avoid the lru field.
Move the add/removal of the pgds to the pgdlist into the
constructor / destructor. That way the implementation is
congruent with i386.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
arch/x86_64/Kconfig | 4 ++
arch/x86_64/kernel/process.c | 1
arch/x86_64/kernel/smp.c | 2 -
arch/x86_64/mm/fault.c | 5 +-
include/asm-x86_64/pgalloc.h | 76 +++++++++++++++++++++----------------------
include/asm-x86_64/pgtable.h | 3 -
mm/Kconfig | 5 ++
7 files changed, 52 insertions(+), 44 deletions(-)
Index: linux-2.6.21-rc5-mm4/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/x86_64/Kconfig 2007-04-07 18:09:17.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/x86_64/Kconfig 2007-04-07 18:09:30.000000000 -0700
@@ -56,6 +56,14 @@
bool
default y
+config QUICKLIST
+ bool
+ default y
+
+config NR_QUICK
+ int
+ default 2
+
config ISA
bool
Index: linux-2.6.21-rc5-mm4/include/asm-x86_64/pgalloc.h
===================================================================
--- linux-2.6.21-rc5-mm4.orig/include/asm-x86_64/pgalloc.h 2007-04-07 18:07:47.000000000 -0700
+++ linux-2.6.21-rc5-mm4/include/asm-x86_64/pgalloc.h 2007-04-07 18:47:03.000000000 -0700
@@ -4,6 +4,10 @@
#include <asm/pda.h>
#include <linux/threads.h>
#include <linux/mm.h>
+#include <linux/quicklist.h>
+
+#define QUICK_PGD 0 /* We preserve special mappings over free */
+#define QUICK_PT 1 /* Other page table pages that are zero on free */
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
@@ -20,23 +24,23 @@
static inline void pmd_free(pmd_t *pmd)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- free_page((unsigned long)pmd);
+ quicklist_free(QUICK_PT, NULL, pmd);
}
static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
{
- return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ return (pmd_t *)quicklist_alloc(QUICK_PT, GFP_KERNEL|__GFP_REPEAT, NULL);
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ return (pud_t *)quicklist_alloc(QUICK_PT, GFP_KERNEL|__GFP_REPEAT, NULL);
}
static inline void pud_free (pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- free_page((unsigned long)pud);
+ quicklist_free(QUICK_PT, NULL, pud);
}
static inline void pgd_list_add(pgd_t *pgd)
@@ -57,41 +61,57 @@
spin_unlock(&pgd_lock);
}
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+static inline void pgd_ctor(void *x)
{
unsigned boundary;
- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if (!pgd)
- return NULL;
- pgd_list_add(pgd);
+ pgd_t *pgd = x;
+ struct page *page = virt_to_page(pgd);
+
/*
* Copy kernel pointers in from init.
- * Could keep a freelist or slab cache of those because the kernel
- * part never changes.
*/
boundary = pgd_index(__PAGE_OFFSET);
- memset(pgd, 0, boundary * sizeof(pgd_t));
memcpy(pgd + boundary,
- init_level4_pgt + boundary,
- (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+ init_level4_pgt + boundary,
+ (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+
+ spin_lock(&pgd_lock);
+ list_add(&page->lru, &pgd_list);
+ spin_unlock(&pgd_lock);
+}
+
+static inline void pgd_dtor(void *x)
+{
+ pgd_t *pgd = x;
+ struct page *page = virt_to_page(pgd);
+
+ spin_lock(&pgd_lock);
+ list_del(&page->lru);
+ spin_unlock(&pgd_lock);
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = (pgd_t *)quicklist_alloc(QUICK_PGD,
+ GFP_KERNEL|__GFP_REPEAT, pgd_ctor);
return pgd;
}
static inline void pgd_free(pgd_t *pgd)
{
BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
- pgd_list_del(pgd);
- free_page((unsigned long)pgd);
+ quicklist_free(QUICK_PGD, pgd_dtor, pgd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ return (pte_t *)quicklist_alloc(QUICK_PT, GFP_KERNEL|__GFP_REPEAT, NULL);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ void *p = (void *)quicklist_alloc(QUICK_PT, GFP_KERNEL|__GFP_REPEAT, NULL);
+
if (!p)
return NULL;
return virt_to_page(p);
@@ -103,17 +123,22 @@
static inline void pte_free_kernel(pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
- free_page((unsigned long)pte);
+ quicklist_free(QUICK_PT, NULL, pte);
}
static inline void pte_free(struct page *pte)
{
- __free_page(pte);
-}
+ quicklist_free_page(QUICK_PT, NULL, pte);
+}
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) quicklist_free_page(QUICK_PT, NULL,(pte))
-#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
-#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
+#define __pmd_free_tlb(tlb,x) quicklist_free(QUICK_PT, NULL, (x))
+#define __pud_free_tlb(tlb,x) quicklist_free(QUICK_PT, NULL, (x))
+static inline void check_pgt_cache(void)
+{
+ quicklist_trim(QUICK_PGD, pgd_dtor, 25, 16);
+ quicklist_trim(QUICK_PT, NULL, 25, 16);
+}
#endif /* _X86_64_PGALLOC_H */
Index: linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/x86_64/kernel/process.c 2007-04-07 18:07:47.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c 2007-04-07 18:09:30.000000000 -0700
@@ -207,6 +207,7 @@
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
+ check_pgt_cache();
rmb();
idle = pm_idle;
if (!idle)
Index: linux-2.6.21-rc5-mm4/arch/x86_64/kernel/smp.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/x86_64/kernel/smp.c 2007-04-07 18:07:47.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/x86_64/kernel/smp.c 2007-04-07 18:09:30.000000000 -0700
@@ -241,7 +241,7 @@
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
+ check_pgt_cache();
preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_mm);
Index: linux-2.6.21-rc5-mm4/include/asm-x86_64/pgtable.h
===================================================================
--- linux-2.6.21-rc5-mm4.orig/include/asm-x86_64/pgtable.h 2007-04-07 18:07:47.000000000 -0700
+++ linux-2.6.21-rc5-mm4/include/asm-x86_64/pgtable.h 2007-04-07 18:09:30.000000000 -0700
@@ -424,7 +424,6 @@
#define HAVE_ARCH_UNMAPPED_AREA
#define pgtable_cache_init() do { } while (0)
-#define check_pgt_cache() do { } while (0)
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:25 ` [QUICKLIST 3/4] Quicklist support for x86_64 Christoph Lameter
@ 2007-04-09 18:43 ` Andi Kleen
2007-04-09 18:46 ` Christoph Lameter
2007-04-09 21:28 ` Andrew Morton
1 sibling, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2007-04-09 18:43 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, linux-kernel
On Monday 09 April 2007 20:25:20 Christoph Lameter wrote:
> #endif /* _X86_64_PGALLOC_H */
> Index: linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c
> ===================================================================
> --- linux-2.6.21-rc5-mm4.orig/arch/x86_64/kernel/process.c 2007-04-07 18:07:47.000000000 -0700
> +++ linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c 2007-04-07 18:09:30.000000000 -0700
> @@ -207,6 +207,7 @@
> if (__get_cpu_var(cpu_idle_state))
> __get_cpu_var(cpu_idle_state) = 0;
>
> + check_pgt_cache();
Wouldn't it be better to do that on memory pressure only (register
it as a shrinker)?
> rmb();
> idle = pm_idle;
> if (!idle)
> Index: linux-2.6.21-rc5-mm4/arch/x86_64/kernel/smp.c
> ===================================================================
> --- linux-2.6.21-rc5-mm4.orig/arch/x86_64/kernel/smp.c 2007-04-07 18:07:47.000000000 -0700
> +++ linux-2.6.21-rc5-mm4/arch/x86_64/kernel/smp.c 2007-04-07 18:09:30.000000000 -0700
> @@ -241,7 +241,7 @@
> }
> if (!cpus_empty(cpu_mask))
> flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
> -
> + check_pgt_cache();
Why is that here?
> preempt_enable();
> }
-Andi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:43 ` Andi Kleen
@ 2007-04-09 18:46 ` Christoph Lameter
2007-04-09 18:49 ` Andi Kleen
0 siblings, 1 reply; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:46 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-mm, linux-kernel
On Mon, 9 Apr 2007, Andi Kleen wrote:
> On Monday 09 April 2007 20:25:20 Christoph Lameter wrote:
>
> > #endif /* _X86_64_PGALLOC_H */
> > Index: linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c
> > ===================================================================
> > --- linux-2.6.21-rc5-mm4.orig/arch/x86_64/kernel/process.c 2007-04-07 18:07:47.000000000 -0700
> > +++ linux-2.6.21-rc5-mm4/arch/x86_64/kernel/process.c 2007-04-07 18:09:30.000000000 -0700
> > @@ -207,6 +207,7 @@
> > if (__get_cpu_var(cpu_idle_state))
> > __get_cpu_var(cpu_idle_state) = 0;
> >
> > + check_pgt_cache();
>
> Wouldn't it be better to do that on memory pressure only (register
> it as a shrinker)?
It has to be done in sync with tlb flushing. Doing that on memory pressure
would complicate things significantly. Also idling means that the cache
grows cold.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:46 ` Christoph Lameter
@ 2007-04-09 18:49 ` Andi Kleen
2007-04-09 18:51 ` Christoph Lameter
0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2007-04-09 18:49 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, linux-kernel
>
> It has to be done in sync with tlb flushing.
Why?
> Doing that on memory pressure
> would complicate things significantly.
Again why?
> Also idling means that the cache
> grows cold.
Does it? Unless you worry about interrupts nothing in idle
is going to thrash caches.
-Andi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:49 ` Andi Kleen
@ 2007-04-09 18:51 ` Christoph Lameter
2007-04-09 18:53 ` Andi Kleen
0 siblings, 1 reply; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:51 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-mm, linux-kernel
On Mon, 9 Apr 2007, Andi Kleen wrote:
> > It has to be done in sync with tlb flushing.
>
> Why?
Otherwise you will leak pages to the page allocator before the tlb flush
occurred.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:51 ` Christoph Lameter
@ 2007-04-09 18:53 ` Andi Kleen
2007-04-09 18:56 ` Christoph Lameter
0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2007-04-09 18:53 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, linux-kernel
On Monday 09 April 2007 20:51:00 Christoph Lameter wrote:
> On Mon, 9 Apr 2007, Andi Kleen wrote:
>
> > > It has to be done in sync with tlb flushing.
> >
> > Why?
>
> Otherwise you will leak pages to the page allocator before the tlb flush
> occurred.
I don't get it sorry. Can you please explain in more detail?
-Andi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:53 ` Andi Kleen
@ 2007-04-09 18:56 ` Christoph Lameter
0 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:56 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-mm, linux-kernel
On Mon, 9 Apr 2007, Andi Kleen wrote:
> > Otherwise you will leak pages to the page allocator before the tlb flush
> > occurred.
>
> I don't get it sorry. Can you please explain in more detail?
On process teardown pages are freed via the tlb mechanism. That mechanism
guarantees that TLBs for pages are flushed before they can be reused. We
tie into that and put pages on quicklists. The quicklists are trimmed
after the TLB flush.
If a shrinker would indepedently free pages from the quicklists then this
mechanism would no longer work and pages that still have a valid TLB for
one process may be reused by other processes.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 18:25 ` [QUICKLIST 3/4] Quicklist support for x86_64 Christoph Lameter
2007-04-09 18:43 ` Andi Kleen
@ 2007-04-09 21:28 ` Andrew Morton
2007-04-09 22:01 ` Christoph Lameter
1 sibling, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2007-04-09 21:28 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, ak, linux-kernel
On Mon, 9 Apr 2007 11:25:20 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> -static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> +static inline void pgd_ctor(void *x)
> +static inline void pgd_dtor(void *x)
Seems dumb to inline these - they're only ever called indirectly, aren't
they?
This means (I think) that the compiler will need to generate an out-of-line
copy of these within each compilation unit which passes the address of these
functions into some other function.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 3/4] Quicklist support for x86_64
2007-04-09 21:28 ` Andrew Morton
@ 2007-04-09 22:01 ` Christoph Lameter
0 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 22:01 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, ak, linux-kernel
On Mon, 9 Apr 2007, Andrew Morton wrote:
> On Mon, 9 Apr 2007 11:25:20 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > -static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> > +static inline void pgd_ctor(void *x)
> > +static inline void pgd_dtor(void *x)
>
> Seems dumb to inline these - they're only ever called indirectly, aren't
> they?
Yes.. In most cases they are not called at all because NULL is passed.
Then the function call can be removed by the compiler from the in line
functions.
> This means (I think) that the compiler will need to generate an out-of-line
> copy of these within each compilation unit which passes the address of these
> functions into some other function.
The function is constant. Constant propagation will lead to the function
being included in the inline function.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* [QUICKLIST 4/4] Quicklist support for sparc64
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
2007-04-09 18:25 ` [QUICKLIST 2/4] Quicklist support for IA64 Christoph Lameter
2007-04-09 18:25 ` [QUICKLIST 3/4] Quicklist support for x86_64 Christoph Lameter
@ 2007-04-09 18:25 ` Christoph Lameter, David Miller
2007-04-09 18:48 ` [QUICKLIST 1/4] Quicklists for page table pages V5 Andrew Morton
` (3 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter, David Miller @ 2007-04-09 18:25 UTC (permalink / raw)
To: akpm; +Cc: linux-mm, ak, linux-kernel, Christoph Lameter
[QUICKLIST]: Add sparc64 quicklist support.
I ported this to sparc64 as per the patch below, tested on
UP SunBlade1500 and 24 cpu Niagara T1000.
Signed-off-by: David S. Miller <davem@davemloft.net>
Index: linux-2.6.21-rc5-mm4/arch/sparc64/Kconfig
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/sparc64/Kconfig 2007-04-07 16:20:07.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/sparc64/Kconfig 2007-04-07 18:03:06.000000000 -0700
@@ -26,6 +26,10 @@
bool
default y
+config QUICKLIST
+ bool
+ default y
+
config STACKTRACE_SUPPORT
bool
default y
Index: linux-2.6.21-rc5-mm4/arch/sparc64/mm/init.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/sparc64/mm/init.c 2007-03-25 15:56:23.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/sparc64/mm/init.c 2007-04-07 18:03:06.000000000 -0700
@@ -178,30 +178,6 @@
int bigkernel = 0;
-struct kmem_cache *pgtable_cache __read_mostly;
-
-static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
-{
- clear_page(addr);
-}
-
-extern void tsb_cache_init(void);
-
-void pgtable_cache_init(void)
-{
- pgtable_cache = kmem_cache_create("pgtable_cache",
- PAGE_SIZE, PAGE_SIZE,
- SLAB_HWCACHE_ALIGN |
- SLAB_MUST_HWCACHE_ALIGN,
- zero_ctor,
- NULL);
- if (!pgtable_cache) {
- prom_printf("Could not create pgtable_cache\n");
- prom_halt();
- }
- tsb_cache_init();
-}
-
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_t dcpage_flushes = ATOMIC_INIT(0);
#ifdef CONFIG_SMP
Index: linux-2.6.21-rc5-mm4/arch/sparc64/mm/tsb.c
===================================================================
--- linux-2.6.21-rc5-mm4.orig/arch/sparc64/mm/tsb.c 2007-03-25 15:56:23.000000000 -0700
+++ linux-2.6.21-rc5-mm4/arch/sparc64/mm/tsb.c 2007-04-07 18:03:06.000000000 -0700
@@ -252,7 +252,7 @@
"tsb_1MB",
};
-void __init tsb_cache_init(void)
+void __init pgtable_cache_init(void)
{
unsigned long i;
Index: linux-2.6.21-rc5-mm4/include/asm-sparc64/pgalloc.h
===================================================================
--- linux-2.6.21-rc5-mm4.orig/include/asm-sparc64/pgalloc.h 2007-03-25 15:56:23.000000000 -0700
+++ linux-2.6.21-rc5-mm4/include/asm-sparc64/pgalloc.h 2007-04-07 18:03:07.000000000 -0700
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/quicklist.h>
#include <asm/spitfire.h>
#include <asm/cpudata.h>
@@ -13,52 +14,50 @@
#include <asm/page.h>
/* Page table allocation/freeing. */
-extern struct kmem_cache *pgtable_cache;
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pgd_free(pgd_t *pgd)
{
- kmem_cache_free(pgtable_cache, pgd);
+ quicklist_free(0, NULL, pgd);
}
#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(pgtable_cache,
- GFP_KERNEL|__GFP_REPEAT);
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline void pmd_free(pmd_t *pmd)
{
- kmem_cache_free(pgtable_cache, pmd);
+ quicklist_free(0, NULL, pmd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return kmem_cache_alloc(pgtable_cache,
- GFP_KERNEL|__GFP_REPEAT);
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
- return virt_to_page(pte_alloc_one_kernel(mm, address));
+ void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ return pg ? virt_to_page(pg) : NULL;
}
static inline void pte_free_kernel(pte_t *pte)
{
- kmem_cache_free(pgtable_cache, pte);
+ quicklist_free(0, NULL, pte);
}
static inline void pte_free(struct page *ptepage)
{
- pte_free_kernel(page_address(ptepage));
+ quicklist_free_page(0, NULL, ptepage);
}
@@ -66,6 +65,9 @@
#define pmd_populate(MM,PMD,PTE_PAGE) \
pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
-#define check_pgt_cache() do { } while (0)
+static inline void check_pgt_cache(void)
+{
+ quicklist_trim(0, NULL, 25, 16);
+}
#endif /* _SPARC64_PGALLOC_H */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
` (2 preceding siblings ...)
2007-04-09 18:25 ` [QUICKLIST 4/4] Quicklist support for sparc64 Christoph Lameter, David Miller
@ 2007-04-09 18:48 ` Andrew Morton
2007-04-09 18:50 ` Christoph Lameter
2007-04-09 21:27 ` Andrew Morton
` (2 subsequent siblings)
6 siblings, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2007-04-09 18:48 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, linux-kernel, ak
On Mon, 9 Apr 2007 11:25:09 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> On x86_64 this cuts allocation overhead for page table pages down to
> a fraction (kernel compile / editing load. TSC based measurement
> of times spend in each function):
>
> no quicklist
>
> pte_alloc 1569048 4.3s(401ns/2.7us/179.7us)
> pmd_alloc 780988 2.1s(337ns/2.7us/86.1us)
> pud_alloc 780072 2.2s(424ns/2.8us/300.6us)
> pgd_alloc 260022 1s(920ns/4us/263.1us)
>
> quicklist:
>
> pte_alloc 452436 573.4ms(8ns/1.3us/121.1us)
> pmd_alloc 196204 174.5ms(7ns/889ns/46.1us)
> pud_alloc 195688 172.4ms(7ns/881ns/151.3us)
> pgd_alloc 65228 9.8ms(8ns/150ns/6.1us)
>
> pgd allocations are the most complex and there we see the most dramatic
> improvement (may be we can cut down the amount of pgds cached somewhat?).
> But even the pte allocations still see a doubling of performance.
Was there any observeable change in overall runtime?
What are the numbers in parentheses?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 18:48 ` [QUICKLIST 1/4] Quicklists for page table pages V5 Andrew Morton
@ 2007-04-09 18:50 ` Christoph Lameter
0 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 18:50 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, linux-kernel, ak
On Mon, 9 Apr 2007, Andrew Morton wrote:
> On Mon, 9 Apr 2007 11:25:09 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > On x86_64 this cuts allocation overhead for page table pages down to
> > a fraction (kernel compile / editing load. TSC based measurement
> > of times spend in each function):
> >
> > no quicklist
> >
> > pte_alloc 1569048 4.3s(401ns/2.7us/179.7us)
> > pmd_alloc 780988 2.1s(337ns/2.7us/86.1us)
> > pud_alloc 780072 2.2s(424ns/2.8us/300.6us)
> > pgd_alloc 260022 1s(920ns/4us/263.1us)
> >
> > quicklist:
> >
> > pte_alloc 452436 573.4ms(8ns/1.3us/121.1us)
> > pmd_alloc 196204 174.5ms(7ns/889ns/46.1us)
> > pud_alloc 195688 172.4ms(7ns/881ns/151.3us)
> > pgd_alloc 65228 9.8ms(8ns/150ns/6.1us)
> >
> > pgd allocations are the most complex and there we see the most dramatic
> > improvement (may be we can cut down the amount of pgds cached somewhat?).
> > But even the pte allocations still see a doubling of performance.
>
> Was there any observeable change in overall runtime?
The kernel compile times fluctuates as usual. There was a tendency for the
times to be less but no clear win as discussed before.
> What are the numbers in parentheses?
(minimum/average/maximum)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
` (3 preceding siblings ...)
2007-04-09 18:48 ` [QUICKLIST 1/4] Quicklists for page table pages V5 Andrew Morton
@ 2007-04-09 21:27 ` Andrew Morton
2007-04-09 21:41 ` Andrew Morton
2007-04-10 4:45 ` Benjamin Herrenschmidt
6 siblings, 0 replies; 24+ messages in thread
From: Andrew Morton @ 2007-04-09 21:27 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, linux-kernel, ak
On Mon, 9 Apr 2007 11:25:09 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> Quicklists for page table pages V5
>
> ...
>
> +/*
> + * The two key functions quicklist_alloc and quicklist_free are inline so
> + * that they may be custom compiled for the platform.
> + * Specifying a NULL ctor can remove constructor support. Specifying
> + * a constant quicklist allows the determination of the exact address
> + * in the per cpu area.
> + *
> + * The fast patch in quicklist_alloc touched only a per cpu cacheline and
> + * the first cacheline of the page itself. There is minmal overhead involved.
> + */
> +static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
> +{
> + struct quicklist *q;
> + void **p = NULL;
> +
> + q =&get_cpu_var(quicklist)[nr];
> + p = q->page;
> + if (likely(p)) {
> + q->page = p[0];
> + p[0] = NULL;
> + q->nr_pages--;
> + }
> + put_cpu_var(quicklist);
> + if (likely(p))
> + return p;
> +
> + p = (void *)__get_free_page(flags | __GFP_ZERO);
> + if (ctor && p)
> + ctor(p);
> + return p;
> +}
> +
> +static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
> + struct page *page)
> +{
> + struct quicklist *q;
> + int nid = page_to_nid(page);
> +
> + if (unlikely(nid != numa_node_id())) {
> + if (dtor)
> + dtor(p);
> + free_page((unsigned long)p);
free_page() has to run virt_to_page(), but we already have the page*.
> + return;
> + }
> +
> + q = &get_cpu_var(quicklist)[nr];
> + *(void **)p = q->page;
> + q->page = p;
> + q->nr_pages++;
> + put_cpu_var(quicklist);
> +}
> +
> +static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
> +{
> + __quicklist_free(nr, dtor, pp, virt_to_page(pp));
> +}
> +
> +static inline void quicklist_free_page(int nr, void (*dtor)(void *),
> + struct page *page)
> +{
> + __quicklist_free(nr, dtor, page_address(page), page);
> +}
All this (still) seems way too big to be inlined. I'm showing a 20-odd
byte reduction in x86_64's memory.o text when it is uninlined. Pretty
modest I guess.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
` (4 preceding siblings ...)
2007-04-09 21:27 ` Andrew Morton
@ 2007-04-09 21:41 ` Andrew Morton
2007-04-09 22:03 ` Christoph Lameter
2007-04-10 4:45 ` Benjamin Herrenschmidt
6 siblings, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2007-04-09 21:41 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, linux-kernel, ak
On Mon, 9 Apr 2007 11:25:09 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> Quicklists for page table pages V5
So... we skipped i386 this time?
I'd have gone squeamish if it was included, due to the mystery crash when
we (effectively) set the list size to zero. Someone(tm) should look into
that - who knows, it might indicate a problem in generic code.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 21:41 ` Andrew Morton
@ 2007-04-09 22:03 ` Christoph Lameter
2007-04-10 0:26 ` William Lee Irwin III
0 siblings, 1 reply; 24+ messages in thread
From: Christoph Lameter @ 2007-04-09 22:03 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, linux-kernel, ak, William Lee Irwin III
On Mon, 9 Apr 2007, Andrew Morton wrote:
> On Mon, 9 Apr 2007 11:25:09 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > Quicklists for page table pages V5
>
> So... we skipped i386 this time?
>
> I'd have gone squeamish if it was included, due to the mystery crash when
> we (effectively) set the list size to zero. Someone(tm) should look into
> that - who knows, it might indicate a problem in generic code.
Yeah too many scary monsters in the i386 arch code. Maybe Bill Irwin can
take a look at how to make this work? He liked the benchmarking code that
I posted so he may have the tools to insure that it works right. Maybe he
can figure out some additional tricks on how to make quicklists work
better?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 22:03 ` Christoph Lameter
@ 2007-04-10 0:26 ` William Lee Irwin III
2007-04-10 0:53 ` Christoph Lameter
0 siblings, 1 reply; 24+ messages in thread
From: William Lee Irwin III @ 2007-04-10 0:26 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, linux-mm, linux-kernel, ak
On Mon, 9 Apr 2007, Andrew Morton wrote:
>> So... we skipped i386 this time?
>> I'd have gone squeamish if it was included, due to the mystery crash when
>> we (effectively) set the list size to zero. Someone(tm) should look into
>> that - who knows, it might indicate a problem in generic code.
On Mon, Apr 09, 2007 at 03:03:19PM -0700, Christoph Lameter wrote:
> Yeah too many scary monsters in the i386 arch code. Maybe Bill Irwin can
> take a look at how to make this work? He liked the benchmarking code that
> I posted so he may have the tools to insure that it works right. Maybe he
> can figure out some additional tricks on how to make quicklists work
> better?
There shouldn't be anything all that interesting in the i386 code apart
from accommodations made for slab.c and pageattr.c. But yes, I can do
the grunt work there since I'm familiar enough with its history.
I used the i386 pagetable caching backout code to help verify that
nothing unusual was going on with generic code in this area. I can
debug the altered quicklist code in like fashion to what that was.
Basically, I'll help all this along.
-- wli
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-09 18:25 [QUICKLIST 1/4] Quicklists for page table pages V5 Christoph Lameter
` (5 preceding siblings ...)
2007-04-09 21:41 ` Andrew Morton
@ 2007-04-10 4:45 ` Benjamin Herrenschmidt
2007-04-11 4:04 ` Christoph Lameter
6 siblings, 1 reply; 24+ messages in thread
From: Benjamin Herrenschmidt @ 2007-04-10 4:45 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, linux-kernel, ak, Paul Mackerras
On Mon, 2007-04-09 at 11:25 -0700, Christoph Lameter wrote:
> Quicklists for page table pages V5
Looks interesting, but unfortunately not very useful at this point for
powerpc unless you remove the assumption that quicklists contain
pages...
On powerpc, we currently use kmem cache slabs (though that isn't
terribly node friendly) whose sizes depend on the page size.
For a 4K page size kernel, we have 4 level page tables and use 2 caches,
PTE and PGD pages are 4K (thus are PAGE_SIZE'd), and PMD & PUD are 1K.
For a 64K page size kernel, we have 3 level page tables and we use 3
caches: a PGD pages are 128 bytes (yeah, not big heh...), our pmd
pages are 32K (half a page) and PTE pages are PAGE_SIZE (64K).
Cheers,
Ben.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-10 4:45 ` Benjamin Herrenschmidt
@ 2007-04-11 4:04 ` Christoph Lameter
2007-04-11 4:18 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 24+ messages in thread
From: Christoph Lameter @ 2007-04-11 4:04 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: akpm, linux-mm, linux-kernel, ak, Paul Mackerras
On Tue, 10 Apr 2007, Benjamin Herrenschmidt wrote:
> On Mon, 2007-04-09 at 11:25 -0700, Christoph Lameter wrote:
>
> > Quicklists for page table pages V5
>
> Looks interesting, but unfortunately not very useful at this point for
> powerpc unless you remove the assumption that quicklists contain
> pages...
Then quicklists wont be as simple anymore.
> On powerpc, we currently use kmem cache slabs (though that isn't
> terribly node friendly) whose sizes depend on the page size.
>
> For a 4K page size kernel, we have 4 level page tables and use 2 caches,
> PTE and PGD pages are 4K (thus are PAGE_SIZE'd), and PMD & PUD are 1K.
PTE and PGD could be run via quicklists? With PTEs you cover the most
common case. Quicklists using PGDs will allow to optimize using
preconstructed pages.
Its probably best to keep the slabs for the 1K pages.
> For a 64K page size kernel, we have 3 level page tables and we use 3
> caches: a PGD pages are 128 bytes (yeah, not big heh...), our pmd
> pages are 32K (half a page) and PTE pages are PAGE_SIZE (64K).
Ok so use quicklists for the PTEs and slab for the rest? A PGD of only 128
bytes? Stuff one at the end of the mm_struct or the task struct? That way
you can avoid allocation overhead.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-11 4:04 ` Christoph Lameter
@ 2007-04-11 4:18 ` Benjamin Herrenschmidt
2007-04-11 5:41 ` Paul Mackerras
0 siblings, 1 reply; 24+ messages in thread
From: Benjamin Herrenschmidt @ 2007-04-11 4:18 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, linux-kernel, ak, Paul Mackerras
On Tue, 2007-04-10 at 21:04 -0700, Christoph Lameter wrote:
> On Tue, 10 Apr 2007, Benjamin Herrenschmidt wrote:
>
> > On Mon, 2007-04-09 at 11:25 -0700, Christoph Lameter wrote:
> >
> > > Quicklists for page table pages V5
> >
> > Looks interesting, but unfortunately not very useful at this point for
> > powerpc unless you remove the assumption that quicklists contain
> > pages...
>
> Then quicklists wont be as simple anymore.
>
> > On powerpc, we currently use kmem cache slabs (though that isn't
> > terribly node friendly) whose sizes depend on the page size.
> >
> > For a 4K page size kernel, we have 4 level page tables and use 2 caches,
> > PTE and PGD pages are 4K (thus are PAGE_SIZE'd), and PMD & PUD are 1K.
>
> PTE and PGD could be run via quicklists? With PTEs you cover the most
> common case. Quicklists using PGDs will allow to optimize using
> preconstructed pages.
>
> Its probably best to keep the slabs for the 1K pages.
>
> > For a 64K page size kernel, we have 3 level page tables and we use 3
> > caches: a PGD pages are 128 bytes (yeah, not big heh...), our pmd
> > pages are 32K (half a page) and PTE pages are PAGE_SIZE (64K).
>
> Ok so use quicklists for the PTEs and slab for the rest? A PGD of only 128
> bytes? Stuff one at the end of the mm_struct or the task struct? That way
> you can avoid allocation overhead.
Yeah, maybe... I need to think about it a bit more. I might be able to
make the PMD a full page too.
Ben.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-11 4:18 ` Benjamin Herrenschmidt
@ 2007-04-11 5:41 ` Paul Mackerras
2007-04-11 6:15 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 24+ messages in thread
From: Paul Mackerras @ 2007-04-11 5:41 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Christoph Lameter, akpm, linux-mm, linux-kernel, ak
Benjamin Herrenschmidt writes:
> > > For a 64K page size kernel, we have 3 level page tables and we use 3
> > > caches: a PGD pages are 128 bytes (yeah, not big heh...), our pmd
> > > pages are 32K (half a page) and PTE pages are PAGE_SIZE (64K).
> >
> > Ok so use quicklists for the PTEs and slab for the rest? A PGD of only 128
> > bytes? Stuff one at the end of the mm_struct or the task struct? That way
> > you can avoid allocation overhead.
>
> Yeah, maybe... I need to think about it a bit more. I might be able to
> make the PMD a full page too.
There was a reason for making the PMD level map 256MB. I'd have to
remember what that was and make sure it didn't apply any more first...
Paul.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [QUICKLIST 1/4] Quicklists for page table pages V5
2007-04-11 5:41 ` Paul Mackerras
@ 2007-04-11 6:15 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 24+ messages in thread
From: Benjamin Herrenschmidt @ 2007-04-11 6:15 UTC (permalink / raw)
To: Paul Mackerras; +Cc: Christoph Lameter, akpm, linux-mm, linux-kernel, ak
On Wed, 2007-04-11 at 15:41 +1000, Paul Mackerras wrote:
> Benjamin Herrenschmidt writes:
>
> > > > For a 64K page size kernel, we have 3 level page tables and we use 3
> > > > caches: a PGD pages are 128 bytes (yeah, not big heh...), our pmd
> > > > pages are 32K (half a page) and PTE pages are PAGE_SIZE (64K).
> > >
> > > Ok so use quicklists for the PTEs and slab for the rest? A PGD of only 128
> > > bytes? Stuff one at the end of the mm_struct or the task struct? That way
> > > you can avoid allocation overhead.
> >
> > Yeah, maybe... I need to think about it a bit more. I might be able to
> > make the PMD a full page too.
>
> There was a reason for making the PMD level map 256MB. I'd have to
> remember what that was and make sure it didn't apply any more first...
For dynamic VSIDs....
Ben.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread