* [patch] arch: remove __GFP_REPEAT for order-0 allocations
@ 2010-09-28 10:45 David Rientjes
2010-09-28 21:36 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: David Rientjes @ 2010-09-28 10:45 UTC (permalink / raw)
To: Andrew Morton
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
Order-0 allocations, including quicklist_alloc(), are always under
PAGE_ALLOC_COSTLY_ORDER, so they loop endlessly in the page allocator
already without the need for __GFP_REPEAT.
Also includes general cleanups to convert alloc_pages(gfp_mask, 0) to its
equivalent alloc_page(gfp_mask).
Signed-off-by: David Rientjes <rientjes@google.com>
---
arch/alpha/include/asm/pgalloc.h | 4 ++--
arch/arm/include/asm/pgalloc.h | 2 +-
arch/avr32/include/asm/pgalloc.h | 6 +++---
arch/cris/include/asm/pgalloc.h | 4 ++--
arch/frv/mm/pgalloc.c | 6 +++---
arch/m68k/include/asm/motorola_pgalloc.h | 4 ++--
arch/m68k/include/asm/sun3_pgalloc.h | 4 ++--
arch/microblaze/include/asm/pgalloc.h | 7 +++----
arch/microblaze/mm/pgtable.c | 3 +--
arch/mn10300/mm/pgtable.c | 6 +++---
arch/parisc/include/asm/pgalloc.h | 4 ++--
arch/powerpc/include/asm/pgalloc-64.h | 2 +-
arch/powerpc/mm/pgtable_32.c | 6 ++----
arch/s390/mm/pgtable.c | 2 +-
arch/sh/include/asm/pgalloc.h | 4 ++--
arch/sparc/mm/sun4c.c | 2 +-
arch/um/kernel/mem.c | 4 ++--
arch/x86/include/asm/pgalloc.h | 4 ++--
arch/x86/mm/pgtable.c | 2 +-
19 files changed, 36 insertions(+), 40 deletions(-)
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pmd_t *
pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return ret;
}
@@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -36,7 +36,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
#define pgd_alloc(mm) get_pgd_slow(mm)
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
/*
* Allocate one PTE table.
diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h
--- a/arch/avr32/include/asm/pgalloc.h
+++ b/arch/avr32/include/asm/pgalloc.h
@@ -42,7 +42,7 @@ static inline void pgd_ctor(void *x)
*/
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor);
+ return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -62,7 +62,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
void *pg;
- pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
if (!pg)
return NULL;
diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h
--- a/arch/cris/include/asm/pgalloc.h
+++ b/arch/cris/include/asm/pgalloc.h
@@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
pgtable_page_ctor(pte);
return pte;
}
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
if (pte)
clear_page(pte);
return pte;
@@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
struct page *page;
#ifdef CONFIG_HIGHPTE
- page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+ page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
#else
- page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+ page = alloc_page(GFP_KERNEL);
#endif
if (page) {
clear_highpage(page);
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
{
pte_t *pte;
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (pte) {
__flush_page_to_ram(pte);
flush_tlb_kernel_page(pte);
@@ -29,7 +29,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
pte_t *pte;
if(!page)
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -41,7 +41,7 @@ do { \
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ unsigned long page = __get_free_page(GFP_KERNEL);
if (!page)
return NULL;
@@ -53,7 +53,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
- struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+ struct page *page = alloc_page(GFP_KERNEL);
if (page == NULL)
return NULL;
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -114,14 +114,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
struct page *ptepage;
+ int flags = GFP_KERNEL;
#ifdef CONFIG_HIGHPTE
- int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
-#else
- int flags = GFP_KERNEL | __GFP_REPEAT;
+ flags |= __GFP_HIGHMEM;
#endif
- ptepage = alloc_pages(flags, 0);
+ ptepage = alloc_page(flags);
if (ptepage)
clear_highpage(ptepage);
return ptepage;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -245,8 +245,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
{
pte_t *pte;
if (mem_init_done) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL |
- __GFP_REPEAT | __GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
} else {
pte = (pte_t *)early_get_page();
if (pte)
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
--- a/arch/mn10300/mm/pgtable.c
+++ b/arch/mn10300/mm/pgtable.c
@@ -64,7 +64,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
if (pte)
clear_page(pte);
return pte;
@@ -75,9 +75,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
struct page *pte;
#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+ pte = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
#else
- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+ pte = alloc_page(GFP_KERNEL);
#endif
if (pte)
clear_highpage(pte);
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -120,7 +120,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (page)
pgtable_page_ctor(page);
return page;
@@ -129,7 +129,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -103,7 +103,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+ return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -102,7 +102,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
extern void *early_get_page(void);
if (mem_init_done) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
} else {
pte = (pte_t *)early_get_page();
if (pte)
@@ -115,9 +115,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
-
- ptepage = alloc_pages(flags, 0);
+ ptepage = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!ptepage)
return NULL;
pgtable_page_ctor(ptepage);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -192,7 +192,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
}
if (!page) {
spin_unlock(&mm->context.list_lock);
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ page = alloc_page(GFP_KERNEL);
if (!page)
return NULL;
pgtable_page_ctor(page);
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
void *pg;
- pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
if (!pg)
return NULL;
page = virt_to_page(pg);
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1831,7 +1831,7 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL)
return pte;
- pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
return pte;
}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -288,7 +288,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
@@ -296,7 +296,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
- pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (pte)
pgtable_page_ctor(pte);
return pte;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -80,7 +80,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
#if PAGETABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ return (pmd_t *)get_zeroed_page(GFP_KERNEL);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -116,7 +116,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ return (pud_t *)get_zeroed_page(GFP_KERNEL);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -5,7 +5,7 @@
#include <asm/tlb.h>
#include <asm/fixmap.h>
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
#ifdef CONFIG_HIGHPTE
#define PGALLOC_USER_GFP __GFP_HIGHMEM
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 10:45 [patch] arch: remove __GFP_REPEAT for order-0 allocations David Rientjes
@ 2010-09-28 21:36 ` Andrew Morton
2010-09-28 21:57 ` Russell King - ARM Linux
2010-09-28 22:47 ` David Rientjes
0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2010-09-28 21:36 UTC (permalink / raw)
To: David Rientjes
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010 03:45:10 -0700 (PDT)
David Rientjes <rientjes@google.com> wrote:
> Order-0 allocations, including quicklist_alloc(), are always under
> PAGE_ALLOC_COSTLY_ORDER, so they loop endlessly in the page allocator
> already without the need for __GFP_REPEAT.
That's only true for the current implementation of the page allocator.
If we were to change the page allocator behaviour to not do that (and
we change it daily!) then all those callsites which wanted __GFP_REPEAT
behaviour will get broken. So someone would need to go back and work
out how to unbreak them, if we remembered.
Plus there's presumably some documentary benefit in leaving the
__GFP_REPEATs in there.
Why are those __GFP_REPEATs present at those callsites? What were
developers trying to achieve?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 21:36 ` Andrew Morton
@ 2010-09-28 21:57 ` Russell King - ARM Linux
2010-09-28 22:47 ` David Rientjes
1 sibling, 0 replies; 10+ messages in thread
From: Russell King - ARM Linux @ 2010-09-28 21:57 UTC (permalink / raw)
To: Andrew Morton
Cc: David Rientjes, Arnd Bergmann, Richard Henderson,
Ivan Kokshaysky, Matt Turner, Mikael Starvik, Jesper Nilsson,
David Howells, Geert Uytterhoeven, Roman Zippel, Michal Simek,
Koichi Yasutake, Kyle McMartin, Helge Deller,
Benjamin Herrenschmidt, Paul Mackerras, Martin Schwidefsky,
Heiko Carstens, Paul Mundt, David S. Miller, Jeff Dike,
Ingo Molnar, H. Peter Anvin, Thomas Gleixner, linux-arch,
linux-mm
On Tue, Sep 28, 2010 at 02:36:55PM -0700, Andrew Morton wrote:
> On Tue, 28 Sep 2010 03:45:10 -0700 (PDT)
> David Rientjes <rientjes@google.com> wrote:
>
> > Order-0 allocations, including quicklist_alloc(), are always under
> > PAGE_ALLOC_COSTLY_ORDER, so they loop endlessly in the page allocator
> > already without the need for __GFP_REPEAT.
>
> That's only true for the current implementation of the page allocator.
>
> If we were to change the page allocator behaviour to not do that (and
> we change it daily!) then all those callsites which wanted __GFP_REPEAT
> behaviour will get broken. So someone would need to go back and work
> out how to unbreak them, if we remembered.
>
> Plus there's presumably some documentary benefit in leaving the
> __GFP_REPEATs in there.
>
> Why are those __GFP_REPEATs present at those callsites? What were
> developers trying to achieve?
As I understand it, it's come about by way of evolution. I think (I can't
check) that we had loops in the allocation functions. These were removed
and replaced by __GFP_REPEATs sometime before the current GIT history
started.
I'd check on bkbits.net which I'm sure will have it, but unfortunately I
can't get access to the history for include/asm-arm/pgalloc.h prior to
its move to arch/arm.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 21:36 ` Andrew Morton
2010-09-28 21:57 ` Russell King - ARM Linux
@ 2010-09-28 22:47 ` David Rientjes
2010-09-28 22:53 ` Andrew Morton
1 sibling, 1 reply; 10+ messages in thread
From: David Rientjes @ 2010-09-28 22:47 UTC (permalink / raw)
To: Andrew Morton
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010, Andrew Morton wrote:
> > Order-0 allocations, including quicklist_alloc(), are always under
> > PAGE_ALLOC_COSTLY_ORDER, so they loop endlessly in the page allocator
> > already without the need for __GFP_REPEAT.
>
> That's only true for the current implementation of the page allocator.
>
Yes, but in this case it's irrelevant since we're talking about order-0
allocations. The page allocator will never be changed so that order-0
allocations immediately fail if there's no available memory, otherwise
we'd only use direct reclaim and the oom killer for high-order allocs or
add __GFP_NOFAIL everywhere and that's quite pointless.
> If we were to change the page allocator behaviour to not do that (and
> we change it daily!) then all those callsites which wanted __GFP_REPEAT
> behaviour will get broken. So someone would need to go back and work
> out how to unbreak them, if we remembered.
>
> Plus there's presumably some documentary benefit in leaving the
> __GFP_REPEATs in there.
>
The documentation is one of the problems here, __GFP_REPEAT isn't sanely
defined by any of it and it leaves the user guessing as to its behavior
unless you peruse the implementation.
Intiution would suggest that __GFP_REPEAT would repeat the allocation
attempt once it failed. After all, we have __GFP_NOFAIL to try
indefinitely. That's not what it does, however.
include/linux/gfp.h:
* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
* _might_ fail. This depends upon the particular VM implementation.
Try hard in what way? Sure, it depends on the implementation but does
that mean we only reclaim if we have __GFP_REPEAT? This definition also
does allow us to change its meaning, so saying it has a specific
importance for order-0 allocations in arch code isn't really that
compelling.
include/linux/slab.h:
* %__GFP_REPEAT - If allocation fails initially, try once more before failing.
Nope, that's not what it does either. (And, if it did, why would that
possibly be helpful unless we know there's something being freed?)
The reality is that __GFP_REPEAT continues the allocation until we've
reclaimed at least the number of pages we're looking for. For order-0
allocations, it would only repeat if we failed to reclaim any pages. But,
if that's the case, the oom killer would have killed something and we
implicitly loop anyway in that situation (otherwise, we would have
needlessly killed a task!) without even looking at the retry logic.
So we can definitely remove __GFP_REPEAT for any order-0 allocation and
it's based on its implementation -- poorly defined as it may be -- and the
inherit design of any sane page allocator that retries such an allocation
if it's going to use reclaim in the first place.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 22:47 ` David Rientjes
@ 2010-09-28 22:53 ` Andrew Morton
2010-09-28 23:12 ` David Rientjes
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2010-09-28 22:53 UTC (permalink / raw)
To: David Rientjes
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010 15:47:57 -0700 (PDT)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 28 Sep 2010, Andrew Morton wrote:
>
> > > Order-0 allocations, including quicklist_alloc(), are always under
> > > PAGE_ALLOC_COSTLY_ORDER, so they loop endlessly in the page allocator
> > > already without the need for __GFP_REPEAT.
> >
> > That's only true for the current implementation of the page allocator.
> >
>
> Yes, but in this case it's irrelevant since we're talking about order-0
> allocations. The page allocator will never be changed so that order-0
> allocations immediately fail if there's no available memory, otherwise
> we'd only use direct reclaim and the oom killer for high-order allocs or
> add __GFP_NOFAIL everywhere and that's quite pointless.
The crystal balls are large on this one.
> So we can definitely remove __GFP_REPEAT for any order-0 allocation and
> it's based on its implementation -- poorly defined as it may be -- and the
> inherit design of any sane page allocator that retries such an allocation
> if it's going to use reclaim in the first place.
Why was __GFP_REPEAT used in those callsites? What were people trying
to achieve?
We shouldn't just go and ignorantly rip it out without understanding
this, and ensuring that we're meeting that intent to the best extent
possible.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 22:53 ` Andrew Morton
@ 2010-09-28 23:12 ` David Rientjes
2010-09-28 23:40 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: David Rientjes @ 2010-09-28 23:12 UTC (permalink / raw)
To: Andrew Morton
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010, Andrew Morton wrote:
> > So we can definitely remove __GFP_REPEAT for any order-0 allocation and
> > it's based on its implementation -- poorly defined as it may be -- and the
> > inherit design of any sane page allocator that retries such an allocation
> > if it's going to use reclaim in the first place.
>
> Why was __GFP_REPEAT used in those callsites? What were people trying
> to achieve?
>
I can't predict what they were trying to achieve since the documentation
varies on the semantics of __GFP_REPEAT, but the actual implementation
would suggest that we want to ensure that reclaim actually frees memory
before we fail the allocation, and that was probably done before it was
decided to implicitly loop already for order-3 and smaller allocations and
prehaps even predates the oom killer.
With the oom killer, which would be used iff direct reclaim failed for the
allocs touched in this patch, we can't report how much memory may be freed
when that killed task exits, so we implicitly loop forever regardless of
__GFP_REPEAT (and for a reason other than PAGE_ALLOC_COSTLY_ORDER: if the
oom killer kills or finds a task that has already been killed but yet to
exit, it automatically retries the allocation waiting for that free memory
without even looking at should_alloc_retry()).
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 23:12 ` David Rientjes
@ 2010-09-28 23:40 ` Andrew Morton
2010-09-28 23:52 ` David Rientjes
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2010-09-28 23:40 UTC (permalink / raw)
To: David Rientjes
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010 16:12:26 -0700 (PDT)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 28 Sep 2010, Andrew Morton wrote:
>
> > > So we can definitely remove __GFP_REPEAT for any order-0 allocation and
> > > it's based on its implementation -- poorly defined as it may be -- and the
> > > inherit design of any sane page allocator that retries such an allocation
> > > if it's going to use reclaim in the first place.
> >
> > Why was __GFP_REPEAT used in those callsites? What were people trying
> > to achieve?
> >
>
> I can't predict what they were trying to achieve
Using my super powers it took me all of three minutes.
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/old-2.6-bkcvs.git
Do `git log > foo', and search foo for GFP_REPEAT.
A couple of interesting ones are:
commit f3615244f15c8bee5783fcf032717ffdfd56e219
Author: akpm <akpm>
AuthorDate: Sun Apr 20 21:28:12 2003 +0000
Commit: akpm <akpm>
CommitDate: Sun Apr 20 21:28:12 2003 +0000
[PATCH] implement __GFP_REPEAT, __GFP_NOFAIL, __GFP_NORETRY
This is a cleanup patch.
There are quite a lot of places in the kernel which will infinitely retry a
memory allocation.
Generally, they get it wrong. Some do yield(), the semantics of which have
changed over time. Some do schedule(), which can lock up if the caller is
SCHED_FIFO/RR. Some do schedule_timeout(), etc.
And often it is unnecessary, because the page allocator will do the retry
internally anyway. But we cannot rely on that - this behaviour may change
(-aa and -rmap kernels do not do this, for instance).
So it is good to formalise and to centralise this operation. If an
allocation specifies __GFP_REPEAT then the page allocator must infinitely
retry the allocation.
The semantics of __GFP_REPEAT are "try harder". The allocation _may_ fail
(the 2.4 -aa and -rmap VM's do not retry infinitely by default).
The semantics of __GFP_NOFAIL are "cannot fail". It is a no-op in this VM,
but needs to be honoured (or fix up the callers) if the VM ischanged to not
retry infinitely by default.
The semantics of __GFP_NOREPEAT are "try once, don't loop". This isn't used
at present (although perhaps it should be, in swapoff). It is mainly for
completeness.
BKrev: 3ea310ecLgvT41M93_3ecU4Tut6XyQ
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c475f7b..ade6d9e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -11,13 +11,26 @@
#define __GFP_DMA 0x01
#define __GFP_HIGHMEM 0x02
-/* Action modifiers - doesn't change the zoning */
+/*
+ * Action modifiers - doesn't change the zoning
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ * _might_ fail. This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ */
#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
#define __GFP_HIGH 0x20 /* Should access emergency pools? */
#define __GFP_IO 0x40 /* Can start physical IO? */
#define __GFP_FS 0x80 /* Can call down to low-level FS? */
#define __GFP_COLD 0x100 /* Cache-cold page required */
#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */
+#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */
+#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
+#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index bdc5256..603748b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t;
#define SLAB_KERNEL GFP_KERNEL
#define SLAB_DMA GFP_DMA
-#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN)
+#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY)
#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */
/* flags to pass to kmem_cache_create().
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c9c7acc..bff7db2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -536,6 +536,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct page *page;
int i;
int cold;
+ int do_retry;
if (wait)
might_sleep();
@@ -626,10 +627,21 @@ rebalance:
}
/*
- * Don't let big-order allocations loop. Yield for kswapd, try again.
+ * Don't let big-order allocations loop unless the caller explicitly
+ * requests that. Wait for some write requests to complete then retry.
+ *
+ * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
+ * may not be true in other implementations.
*/
- if (order <= 3) {
- yield();
+ do_retry = 0;
+ if (!(gfp_mask & __GFP_NORETRY)) {
+ if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+ do_retry = 1;
+ if (gfp_mask & __GFP_NOFAIL)
+ do_retry = 1;
+ }
+ if (do_retry) {
+ blk_congestion_wait(WRITE, HZ/50);
goto rebalance;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 291ce11..c1dffbd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -805,8 +805,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
* excessive rotation of the inactive list, which is _supposed_ to be an LRU,
* yes?
*/
-int
-try_to_free_pages(struct zone *classzone,
+int try_to_free_pages(struct zone *classzone,
unsigned int gfp_mask, unsigned int order)
{
int priority;
@@ -838,7 +837,7 @@ try_to_free_pages(struct zone *classzone,
blk_congestion_wait(WRITE, HZ/10);
shrink_slab(total_scanned, gfp_mask);
}
- if (gfp_mask & __GFP_FS)
+ if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
out_of_memory();
return 0;
}
and
commit 28e172ed053b0535fb7c45dfd404c795a00c36e1
Author: akpm <akpm>
AuthorDate: Sun Apr 20 21:28:25 2003 +0000
Commit: akpm <akpm>
CommitDate: Sun Apr 20 21:28:25 2003 +0000
[PATCH] use __GFP_REPEAT in pte_alloc_one()
Remove all the open-coded retry loops in various architectures, use
__GFP_REPEAT.
It could be that at some time in the future we change __GFP_REPEAT to give up
after ten seconds or so, so all the checks for failed allocations are
retained.
BKrev: 3ea310f9crWaBJIb9us4X_HVeI_DfA
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index bbf7417..d310bcc 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -66,19 +66,9 @@ pgd_alloc(struct mm_struct *mm)
pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte;
- long timeout = 10;
-
- retry:
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
clear_page(pte);
- else if (--timeout >= 0) {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- goto retry;
- }
-
return pte;
}
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 054eec2..9d36261 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -131,39 +131,23 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- int count = 0;
- pte_t *pte;
-
- do {
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
- if (pte)
- clear_page(pte);
- else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte)
+ clear_page(pte);
return pte;
}
struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- int count = 0;
struct page *pte;
-
- do {
+
#if CONFIG_HIGHPTE
- pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
+ pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
#else
- pte = alloc_pages(GFP_KERNEL, 0);
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
#endif
- if (pte)
- clear_highpage(pte);
- else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ if (pte)
+ clear_highpage(pte);
return pte;
}
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 9682525..5d4aef7 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -76,15 +76,11 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
extern void *early_get_page(void);
int timeout = 0;
- if (mem_init_done) {
- while ((pte = (pte_t *) __get_free_page(GFP_KERNEL)) == NULL
- && ++timeout < 10) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
- } else
- pte = (pte_t *) early_get_page();
- if (pte != NULL)
+ if (mem_init_done)
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ else
+ pte = (pte_t *)early_get_page();
+ if (pte)
clear_page(pte);
return pte;
}
@@ -92,20 +88,16 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
- int timeout = 0;
+
#ifdef CONFIG_HIGHPTE
- int flags = GFP_KERNEL | __GFP_HIGHMEM;
+ int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
#else
- int flags = GFP_KERNEL;
+ int flags = GFP_KERNEL | __GFP_REPEAT;
#endif
- while ((pte = alloc_pages(flags, 0)) == NULL) {
- if (++timeout >= 10)
- return NULL;
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
- clear_highpage(pte);
+ pte = alloc_pages(flags, 0);
+ if (pte)
+ clear_highpage(pte);
return pte;
}
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 9cda5ee..e4da222 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1901,7 +1901,7 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL)
return pte;
- pte = (pte_t *)__get_free_page(GFP_KERNEL);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
memset(pte, 0, PAGE_SIZE);
return pte;
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 2e71992..d0c24d4 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -810,35 +810,21 @@ void pgd_free(pgd_t *pgd)
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- int count = 0;
pte_t *pte;
- do {
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
- if (pte)
- clear_page(pte);
- else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte)
+ clear_page(pte);
return pte;
}
struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- int count = 0;
struct page *pte;
- do {
- pte = alloc_pages(GFP_KERNEL, 0);
- if (pte)
- clear_highpage(pte);
- else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+ if (pte)
+ clear_highpage(pte);
return pte;
}
diff --git a/include/asm-arm/proc-armv/pgalloc.h b/include/asm-arm/proc-armv/pgalloc.h
index 4440be7..3263c34 100644
--- a/include/asm-arm/proc-armv/pgalloc.h
+++ b/include/asm-arm/proc-armv/pgalloc.h
@@ -27,17 +27,9 @@
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
{
- int count = 0;
pte_t *pte;
- do {
- pte = (pte_t *)__get_free_page(GFP_KERNEL);
- if (!pte) {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
-
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte) {
clear_page(pte);
clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE);
@@ -51,16 +43,8 @@ static inline struct page *
pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
struct page *pte;
- int count = 0;
-
- do {
- pte = alloc_pages(GFP_KERNEL, 0);
- if (!pte) {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
if (pte) {
void *page = page_address(pte);
clear_page(page);
diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h
index 80e73be..75dde6f 100644
--- a/include/asm-cris/pgalloc.h
+++ b/include/asm-cris/pgalloc.h
@@ -62,7 +62,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
clear_page(pte);
return pte;
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 2e6134a..00847ac 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -125,7 +125,7 @@ pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte)
static inline struct page *
pte_alloc_one (struct mm_struct *mm, unsigned long addr)
{
- struct page *pte = alloc_pages(GFP_KERNEL, 0);
+ struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
if (likely(pte != NULL))
clear_page(page_address(pte));
@@ -135,7 +135,7 @@ pte_alloc_one (struct mm_struct *mm, unsigned long addr)
static inline pte_t *
pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
{
- pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (likely(pte != NULL))
clear_page(pte);
diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h
index 4beb7a8..f315615 100644
--- a/include/asm-m68k/motorola_pgalloc.h
+++ b/include/asm-m68k/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
{
pte_t *pte;
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte) {
clear_page(pte);
__flush_page_to_ram(pte);
@@ -30,7 +30,7 @@ static inline void pte_free_kernel(pte_t *pte)
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page = alloc_pages(GFP_KERNEL, 0);
+ struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
pte_t *pte;
if(!page)
diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h
index 7740a29..3b7f6cc 100644
--- a/include/asm-m68k/sun3_pgalloc.h
+++ b/include/asm-m68k/sun3_pgalloc.h
@@ -39,7 +39,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- unsigned long page = __get_free_page(GFP_KERNEL);
+ unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
@@ -51,7 +51,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
static inline struct page *pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
- struct page *page = alloc_pages(GFP_KERNEL, 0);
+ struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
if (page == NULL)
return NULL;
diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h
index 9492a50..f71b90b 100644
--- a/include/asm-mips/pgalloc.h
+++ b/include/asm-mips/pgalloc.h
@@ -132,7 +132,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
clear_page(pte);
return pte;
diff --git a/include/asm-mips64/pgalloc.h b/include/asm-mips64/pgalloc.h
index 79b5840..a311307 100644
--- a/include/asm-mips64/pgalloc.h
+++ b/include/asm-mips64/pgalloc.h
@@ -93,7 +93,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
clear_page(pte);
return pte;
diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h
index 32dcf11..14c8605 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/include/asm-parisc/pgalloc.h
@@ -73,7 +73,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
static inline struct page *
pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page = alloc_page(GFP_KERNEL);
+ struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (likely(page != NULL))
clear_page(page_address(page));
return page;
@@ -82,7 +82,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
{
- pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (likely(pte != NULL))
clear_page(pte);
return pte;
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 0c46141..40361c2 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -62,19 +62,11 @@ pmd_free(pmd_t *pmd)
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
{
- int count = 0;
pte_t *pte;
- do {
- pte = (pte_t *)__get_free_page(GFP_KERNEL);
- if (pte)
- clear_page(pte);
- else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
-
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte)
+ clear_page(pte);
return pte;
}
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 67230ef..e4729fb 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -120,20 +120,13 @@ static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
{
pte_t *pte;
- int count;
int i;
- count = 0;
- do {
- pte = (pte_t *) __get_free_page(GFP_KERNEL);
- if (pte != NULL) {
- for (i=0; i < PTRS_PER_PTE; i++)
- pte_clear(pte+i);
- } else {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ);
- }
- } while (!pte && (count++ < 10));
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte != NULL) {
+ for (i=0; i < PTRS_PER_PTE; i++)
+ pte_clear(pte+i);
+ }
return pte;
}
diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h
index 9cc5a7d..a60b4c9 100644
--- a/include/asm-sh/pgalloc.h
+++ b/include/asm-sh/pgalloc.h
@@ -35,7 +35,7 @@ static inline void pgd_free(pgd_t *pgd)
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
clear_page(pte);
return pte;
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h
index 4cae8e6..65b8177 100644
--- a/include/asm-x86_64/pgalloc.h
+++ b/include/asm-x86_64/pgalloc.h
@@ -48,12 +48,12 @@ static inline void pgd_free (pgd_t *pgd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- return (pte_t *) get_zeroed_page(GFP_KERNEL);
+ return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- void *p = (void *)get_zeroed_page(GFP_KERNEL);
+ void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
if (!p)
return NULL;
return virt_to_page(p);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 23:40 ` Andrew Morton
@ 2010-09-28 23:52 ` David Rientjes
2010-09-29 0:41 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: David Rientjes @ 2010-09-28 23:52 UTC (permalink / raw)
To: Andrew Morton
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010, Andrew Morton wrote:
> > > > So we can definitely remove __GFP_REPEAT for any order-0 allocation and
> > > > it's based on its implementation -- poorly defined as it may be -- and the
> > > > inherit design of any sane page allocator that retries such an allocation
> > > > if it's going to use reclaim in the first place.
> > >
> > > Why was __GFP_REPEAT used in those callsites? What were people trying
> > > to achieve?
> > >
> >
> > I can't predict what they were trying to achieve
>
> Using my super powers it took me all of three minutes.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/old-2.6-bkcvs.git
>
> Do `git log > foo', and search foo for GFP_REPEAT.
>
> A couple of interesting ones are:
>
Ok, so __GFP_REPEAT was used to replace the open-coding of retry loops by
putting that logic into the page allocator, and that logic was
subsequently changed to tie the bit to how many pages were reclaimed and
retry iff we haven't reclaimed the number of pages needed (in my patch,
that would be a single page).
It also shows that the page allocator has infinitely looped for
allocations under PAGE_ALLOC_COSTLY_ORDER since your patch from over seven
years ago.
So, given the fact that the PAGE_ALLOC_COSTLY_ORDER logic has existed
since the same time, the semantics of __GFP_REPEAT have changed and are
often misrepresented, and we don't even invoke the __GFP_REPEAT logic for
any of the allocations in my patch since they are oom killable, I think my
patch should be merged.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-28 23:52 ` David Rientjes
@ 2010-09-29 0:41 ` Andrew Morton
2010-09-29 1:10 ` David Rientjes
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2010-09-29 0:41 UTC (permalink / raw)
To: David Rientjes
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010 16:52:01 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:
> On Tue, 28 Sep 2010, Andrew Morton wrote:
>
> > > > > So we can definitely remove __GFP_REPEAT for any order-0 allocation and
> > > > > it's based on its implementation -- poorly defined as it may be -- and the
> > > > > inherit design of any sane page allocator that retries such an allocation
> > > > > if it's going to use reclaim in the first place.
> > > >
> > > > Why was __GFP_REPEAT used in those callsites? What were people trying
> > > > to achieve?
> > > >
> > >
> > > I can't predict what they were trying to achieve
> >
> > Using my super powers it took me all of three minutes.
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/old-2.6-bkcvs.git
> >
> > Do `git log > foo', and search foo for GFP_REPEAT.
> >
> > A couple of interesting ones are:
> >
>
> Ok, so __GFP_REPEAT was used to replace the open-coding of retry loops by
> putting that logic into the page allocator, and that logic was
> subsequently changed to tie the bit to how many pages were reclaimed and
> retry iff we haven't reclaimed the number of pages needed (in my patch,
> that would be a single page).
It also shows that there was at least one version of the page allocator
(written by a very skilled and experienced Linux MM developer) which
would simply return NULL when there was no memory. And here you are
assuring us that there will never ever be such a version again.
> It also shows that the page allocator has infinitely looped for
> allocations under PAGE_ALLOC_COSTLY_ORDER since your patch from over seven
> years ago.
Really don't give a shit what the allocator-of-the-moment happens to be
doing.
What I care about is that very smart, experienced and hard-working
Linux developers decided, a long time ago, that page-table allocations
are special, and need special treatment. This is information!
What I also care about is lazy MM developers who just rip stuff out
without understanding it and without even bothering to make an
*attempt* to understand it.
> So, given the fact that the PAGE_ALLOC_COSTLY_ORDER logic has existed
> since the same time, the semantics of __GFP_REPEAT have changed and are
> often misrepresented, and we don't even invoke the __GFP_REPEAT logic for
> any of the allocations in my patch since they are oom killable,
Probably this is because lazy ignorant MM developers broke earlier
intentions without even knowing that they were doing so.
> I think my patch should be merged.
Well before destroying the information, you tell me: why did MM
developers decide that page-table allocations needed special treatment?
What experience led them to decide to implement that? Has the problem
which they solved been fixed by other means? Are those means
sufficiently permanently embedded (see "-aa" above) that we can afford
to destroy this piece of information?
If we break stuff and destroy this sort of historical knowledge then we
get to make the same mistakes every decade or so and the damn thing
*never* converges.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] arch: remove __GFP_REPEAT for order-0 allocations
2010-09-29 0:41 ` Andrew Morton
@ 2010-09-29 1:10 ` David Rientjes
0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2010-09-29 1:10 UTC (permalink / raw)
To: Andrew Morton
Cc: Arnd Bergmann, Richard Henderson, Ivan Kokshaysky, Matt Turner,
Russell King, Mikael Starvik, Jesper Nilsson, David Howells,
Geert Uytterhoeven, Roman Zippel, Michal Simek, Koichi Yasutake,
Kyle McMartin, Helge Deller, Benjamin Herrenschmidt,
Paul Mackerras, Martin Schwidefsky, Heiko Carstens, Paul Mundt,
David S. Miller, Jeff Dike, Ingo Molnar, H. Peter Anvin,
Thomas Gleixner, linux-arch, linux-mm
On Tue, 28 Sep 2010, Andrew Morton wrote:
> What I care about is that very smart, experienced and hard-working
> Linux developers decided, a long time ago, that page-table allocations
> are special, and need special treatment. This is information!
>
Then they implemented it incorrectly since __GFP_REPEAT has never given
any order-0 allocation special treatment.
> What I also care about is lazy MM developers who just rip stuff out
> without understanding it and without even bothering to make an
> *attempt* to understand it.
>
I understand that __GFP_REPEAT does absolutely nothing in all the places
that I removed it in this patch, but if you want to use a gfp flag as
documentation instead of adding a comment to the PAGE_ALLOC_COSTLY_ORDER
retry logic, then that's your call, but I would certainly suggest cleaning
up the erroneous documentation in the tree that specifies its semantics.
> > So, given the fact that the PAGE_ALLOC_COSTLY_ORDER logic has existed
> > since the same time, the semantics of __GFP_REPEAT have changed and are
> > often misrepresented, and we don't even invoke the __GFP_REPEAT logic for
> > any of the allocations in my patch since they are oom killable,
>
> Probably this is because lazy ignorant MM developers broke earlier
> intentions without even knowing that they were doing so.
>
The intention was that they loop forever, but since that's implicit for
these order-0 allocations, I guess you allowed its semantics to change in
a41f24ea without the same objections?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2010-09-29 1:10 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-28 10:45 [patch] arch: remove __GFP_REPEAT for order-0 allocations David Rientjes
2010-09-28 21:36 ` Andrew Morton
2010-09-28 21:57 ` Russell King - ARM Linux
2010-09-28 22:47 ` David Rientjes
2010-09-28 22:53 ` Andrew Morton
2010-09-28 23:12 ` David Rientjes
2010-09-28 23:40 ` Andrew Morton
2010-09-28 23:52 ` David Rientjes
2010-09-29 0:41 ` Andrew Morton
2010-09-29 1:10 ` David Rientjes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox