* [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
@ 2009-05-22 18:42 Christoph Lameter
2009-05-25 1:51 ` KAMEZAWA Hiroyuki
2009-05-25 11:30 ` Mel Gorman
0 siblings, 2 replies; 8+ messages in thread
From: Christoph Lameter @ 2009-05-22 18:42 UTC (permalink / raw)
To: akpm; +Cc: linux-mm, Mel Gorman, npiggin, KAMEZAWA Hiroyuki
Subject: Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
This simplifies the code in gfp_zone() and also keeps the ability of the
compiler to use constant folding to get rid of gfp_zone processing.
The lookup of the zone is done using a bitfield stored in an integer. So
the code in gfp_zone is a simple extraction of bits from a constant bitfield.
The compiler is generating a load of a constant into a register and then
performs a shift and mask operation to get the zone from a gfp_t.
No cachelines are touched and no branches have to be predicted by the
compiler.
We are doing some macro tricks here to convince the compiler to always do the
constant folding if possible.
Tested on:
i386 (kvm), x86_64(native)
Compile tested on:
s390 arm sparc sparc64 mips ia64
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/gfp.h | 85 ++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 70 insertions(+), 15 deletions(-)
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h 2009-04-13 14:04:29.000000000 -0500
+++ linux-2.6/include/linux/gfp.h 2009-04-24 14:21:59.000000000 -0500
@@ -20,7 +20,8 @@ struct vm_area_struct;
#define __GFP_DMA ((__force gfp_t)0x01u)
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
#define __GFP_DMA32 ((__force gfp_t)0x04u)
-
+#define __GFP_MOVABLE ((__force gfp_t)0x08u) /* Page is movable */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
/*
* Action modifiers - doesn't change the zoning
*
@@ -50,7 +51,6 @@ struct vm_area_struct;
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
-#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */
#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -112,24 +112,79 @@ static inline int allocflags_to_migratet
((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
-static inline enum zone_type gfp_zone(gfp_t flags)
-{
+#ifdef CONFIG_ZONE_HIGHMEM
+#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+#else
+#define OPT_ZONE_HIGHMEM ZONE_NORMAL
+#endif
+
#ifdef CONFIG_ZONE_DMA
- if (flags & __GFP_DMA)
- return ZONE_DMA;
+#define OPT_ZONE_DMA ZONE_DMA
+#else
+#define OPT_ZONE_DMA ZONE_NORMAL
#endif
+
#ifdef CONFIG_ZONE_DMA32
- if (flags & __GFP_DMA32)
- return ZONE_DMA32;
+#define OPT_ZONE_DMA32 ZONE_DMA32
+#else
+#define OPT_ZONE_DMA32 OPT_ZONE_DMA
#endif
- if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
- (__GFP_HIGHMEM | __GFP_MOVABLE))
- return ZONE_MOVABLE;
-#ifdef CONFIG_HIGHMEM
- if (flags & __GFP_HIGHMEM)
- return ZONE_HIGHMEM;
+
+#if 16 * ZONES_SHIFT > BITS_PER_LONG
+#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
+#endif
+
+/*
+ * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
+ * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
+ * and there are 16 of them to cover all possible combinations of
+ * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM
+ *
+ */
+#define GFP_ZONE_TABLE ( \
+ (ZONE_NORMAL << 0 * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \
+ | (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT) \
+ | (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT) \
+ | (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
+ | (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
+)
+
+/*
+ * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32
+ * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
+ * entry starting with bit 0. Bit is set if the combination is not
+ * allowed.
+ */
+#define GFP_ZONE_BAD ( \
+ 1 << (__GFP_DMA | __GFP_HIGHMEM) \
+ | 1 << (__GFP_DMA | __GFP_DMA32) \
+ | 1 << (__GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
+)
+
+static inline enum zone_type gfp_zone(gfp_t flags)
+{
+ enum zone_type z;
+ int bit = flags & GFP_ZONEMASK;
+
+ z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
+ ((1 << ZONES_SHIFT) - 1);
+
+ if (__builtin_constant_p(bit))
+ BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+ else {
+#ifdef CONFIG_DEBUG_VM
+ BUG_ON((GFP_ZONE_BAD >> bit) & 1);
#endif
- return ZONE_NORMAL;
+ }
+ return z;
}
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-22 18:42 [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator Christoph Lameter
@ 2009-05-25 1:51 ` KAMEZAWA Hiroyuki
2009-05-26 16:58 ` Christoph Lameter
2009-05-25 11:30 ` Mel Gorman
1 sibling, 1 reply; 8+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-05-25 1:51 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, Mel Gorman, npiggin
On Fri, 22 May 2009 14:42:32 -0400 (EDT)
Christoph Lameter <cl@linux-foundation.org> wrote:
>
> Subject: Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
>
> This simplifies the code in gfp_zone() and also keeps the ability of the
> compiler to use constant folding to get rid of gfp_zone processing.
>
> The lookup of the zone is done using a bitfield stored in an integer. So
> the code in gfp_zone is a simple extraction of bits from a constant bitfield.
> The compiler is generating a load of a constant into a register and then
> performs a shift and mask operation to get the zone from a gfp_t.
>
> No cachelines are touched and no branches have to be predicted by the
> compiler.
>
> We are doing some macro tricks here to convince the compiler to always do the
> constant folding if possible.
>
> Tested on:
> i386 (kvm), x86_64(native)
>
> Compile tested on:
> s390 arm sparc sparc64 mips ia64
>
> Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
>
> ---
> include/linux/gfp.h | 85 ++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 70 insertions(+), 15 deletions(-)
>
> Index: linux-2.6/include/linux/gfp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/gfp.h 2009-04-13 14:04:29.000000000 -0500
> +++ linux-2.6/include/linux/gfp.h 2009-04-24 14:21:59.000000000 -0500
> @@ -20,7 +20,8 @@ struct vm_area_struct;
> #define __GFP_DMA ((__force gfp_t)0x01u)
> #define __GFP_HIGHMEM ((__force gfp_t)0x02u)
> #define __GFP_DMA32 ((__force gfp_t)0x04u)
> -
> +#define __GFP_MOVABLE ((__force gfp_t)0x08u) /* Page is movable */
> +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
> /*
> * Action modifiers - doesn't change the zoning
> *
> @@ -50,7 +51,6 @@ struct vm_area_struct;
> #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
> #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
> #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
> -#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */
>
> #define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */
> #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
> @@ -112,24 +112,79 @@ static inline int allocflags_to_migratet
> ((gfp_flags & __GFP_RECLAIMABLE) != 0);
> }
>
> -static inline enum zone_type gfp_zone(gfp_t flags)
> -{
> +#ifdef CONFIG_ZONE_HIGHMEM
> +#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
> +#else
> +#define OPT_ZONE_HIGHMEM ZONE_NORMAL
> +#endif
> +
> #ifdef CONFIG_ZONE_DMA
> - if (flags & __GFP_DMA)
> - return ZONE_DMA;
> +#define OPT_ZONE_DMA ZONE_DMA
> +#else
> +#define OPT_ZONE_DMA ZONE_NORMAL
> #endif
> +
> #ifdef CONFIG_ZONE_DMA32
> - if (flags & __GFP_DMA32)
> - return ZONE_DMA32;
> +#define OPT_ZONE_DMA32 ZONE_DMA32
> +#else
> +#define OPT_ZONE_DMA32 OPT_ZONE_DMA
> #endif
> - if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
> - (__GFP_HIGHMEM | __GFP_MOVABLE))
> - return ZONE_MOVABLE;
> -#ifdef CONFIG_HIGHMEM
> - if (flags & __GFP_HIGHMEM)
> - return ZONE_HIGHMEM;
> +
> +#if 16 * ZONES_SHIFT > BITS_PER_LONG
> +#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
> +#endif
> +
> +/*
> + * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
> + * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
> + * and there are 16 of them to cover all possible combinations of
> + * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM
> + *
> + */
> +#define GFP_ZONE_TABLE ( \
> + (ZONE_NORMAL << 0 * ZONES_SHIFT) \
> + | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \
> + | (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT) \
> + | (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT) \
> + | (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT) \
> + | (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT) \
> + | (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
> + | (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
> +)
Could you add a comment to explain this as..
==
/*
* Zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA
* But GFP_MOVABLE is not only zone specifier but also allocating policy.
* Then, __GFP_MOVABLE+other zone selector is a valid configuration.
* Only 1bit of the lowest 3 bit (DMA,DMA32,HIGHMEM) can be set to "1".
*/
bit result
=================
0x0 => NORMAL
0x1 => DMA or NORMAL
0x2 => HIGHMEM or NORMAL
0x3 => BAD (DMA+HIGHMEM)
0x4 => DMA32 or DMA or NORMAL
0x5 => BAD (DMA+DMA32)
0x6 => BAD (HIGHMEM+DMA32)
0x7 => BAD (HIGHMEM+DMA32+DMA)
0x8 => NORMAL
0x9 => DMA or NORMAL
0xa => MOVABLE (MOVABLE is selected only when specified with HIGHMEM)
0xb => BAD (MOVABLE+HIGHMEM+DMA)
0xc => DMA32
0xd => BAD (MOVABLE+DMA32+DMA)
0xe => BAD (MOVABLE+DMA32+HIGHMEM)
0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
ZONES_SHIFT must be < 2.
==
?
Thanks,
-Kame
> +
> +/*
> + * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32
> + * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
> + * entry starting with bit 0. Bit is set if the combination is not
> + * allowed.
> + */
> +#define GFP_ZONE_BAD ( \
> + 1 << (__GFP_DMA | __GFP_HIGHMEM) \
> + | 1 << (__GFP_DMA | __GFP_DMA32) \
> + | 1 << (__GFP_DMA32 | __GFP_HIGHMEM) \
> + | 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM) \
> + | 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA) \
> + | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA) \
> + | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM) \
> + | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
> +)
> +
> +static inline enum zone_type gfp_zone(gfp_t flags)
> +{
> + enum zone_type z;
> + int bit = flags & GFP_ZONEMASK;
> +
> + z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
> + ((1 << ZONES_SHIFT) - 1);
> +
> + if (__builtin_constant_p(bit))
> + BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
> + else {
> +#ifdef CONFIG_DEBUG_VM
> + BUG_ON((GFP_ZONE_BAD >> bit) & 1);
> #endif
> - return ZONE_NORMAL;
> + }
> + return z;
> }
>
> /*
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-22 18:42 [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator Christoph Lameter
2009-05-25 1:51 ` KAMEZAWA Hiroyuki
@ 2009-05-25 11:30 ` Mel Gorman
2009-05-26 18:04 ` Christoph Lameter
1 sibling, 1 reply; 8+ messages in thread
From: Mel Gorman @ 2009-05-25 11:30 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, npiggin, KAMEZAWA Hiroyuki
On Fri, May 22, 2009 at 02:42:32PM -0400, Christoph Lameter wrote:
>
> Subject: Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
>
> This simplifies the code in gfp_zone() and also keeps the ability of the
> compiler to use constant folding to get rid of gfp_zone processing.
>
> The lookup of the zone is done using a bitfield stored in an integer. So
> the code in gfp_zone is a simple extraction of bits from a constant bitfield.
> The compiler is generating a load of a constant into a register and then
> performs a shift and mask operation to get the zone from a gfp_t.
>
> No cachelines are touched and no branches have to be predicted by the
> compiler.
>
> We are doing some macro tricks here to convince the compiler to always do the
> constant folding if possible.
>
> Tested on:
> i386 (kvm), x86_64(native)
>
How was this tested? This patch boots on x86 for example, but when I
patched further to validate that gfp_zone() returned sensible values, I got
mismatches for GFP_HIGHUSER. These were the results I got for common GFP
flags on three architectures
x86
[ 0.000000] mminit::gfp_zone GFP_DMA PASS
[ 0.000000] mminit::gfp_zone GFP_DMA32 FAIL 1 != 0
[ 0.000000] mminit::gfp_zone GFP_NOIO PASS
[ 0.000000] mminit::gfp_zone GFP_NOFS PASS
[ 0.000000] mminit::gfp_zone GFP_KERNEL PASS
[ 0.000000] mminit::gfp_zone GFP_TEMPORARY PASS
[ 0.000000] mminit::gfp_zone GFP_USER PASS
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER FAIL 2 != 1
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER_MOVABLE PASS
I expect that the machine would start running into reclaim issues with
enough uptime because it'll not be using Highmem as it should. Similarly,
the GFP_DMA32 may also be a problem as the new implementation is going
ZONE_DMA when ZONE_NORMAL would have been ok in this case.
x86-64
[ 0.000000] mminit::gfp_zone GFP_DMA PASS
[ 0.000000] mminit::gfp_zone GFP_DMA32 PASS
[ 0.000000] mminit::gfp_zone GFP_NOIO PASS
[ 0.000000] mminit::gfp_zone GFP_NOFS PASS
[ 0.000000] mminit::gfp_zone GFP_KERNEL PASS
[ 0.000000] mminit::gfp_zone GFP_TEMPORARY PASS
[ 0.000000] mminit::gfp_zone GFP_USER PASS
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER PASS
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER_MOVABLE PASS
Happy days on x86-64.
ppc64
[ 0.000000] mminit::gfp_zone GFP_DMA PASS
[ 0.000000] mminit::gfp_zone GFP_DMA32 FAIL 1 != 0
[ 0.000000] mminit::gfp_zone GFP_NOIO PASS
[ 0.000000] mminit::gfp_zone GFP_NOFS PASS
[ 0.000000] mminit::gfp_zone GFP_KERNEL PASS
[ 0.000000] mminit::gfp_zone GFP_TEMPORARY PASS
[ 0.000000] mminit::gfp_zone GFP_USER PASS
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER PASS
[ 0.000000] mminit::gfp_zone GFP_HIGHUSER_MOVABLE PASS
This mismatch on GFP_DMA32 is similar to x86. However, on ppc64 this error
is harmless as ZONE_NORMAL is never populated anyway so GFP_DMA32 going to
ZONE_DMA is just fine.
This is similar difficulty that earlier versions of the patch ran into although
this version is much closer to being correct. I'll look again tomorrow to
see can it be repaired. In the meantime, here is the patch I used to validate
your gfp_zone() implementation and maybe you'll spot the problem faster.
==== CUT HERE ====
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-25 1:51 ` KAMEZAWA Hiroyuki
@ 2009-05-26 16:58 ` Christoph Lameter
0 siblings, 0 replies; 8+ messages in thread
From: Christoph Lameter @ 2009-05-26 16:58 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: akpm, linux-mm, Mel Gorman, npiggin
On Mon, 25 May 2009, KAMEZAWA Hiroyuki wrote:
> Could you add a comment to explain this as..
Ok. I added your comments and reworked them a bit:
Subject: Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
This simplifies the code in gfp_zone() and also keeps the ability of the
compiler to use constant folding to get rid of gfp_zone processing.
The lookup of the zone is done using a bitfield stored in an integer. So
the code in gfp_zone is a simple extraction of bits from a constant bitfield.
The compiler is generating a load of a constant into a register and then
performs a shift and mask operation to get the zone from a gfp_t.
No cachelines are touched and no branches have to be predicted by the
compiler.
We are doing some macro tricks here to convince the compiler to always do the
constant folding if possible.
Tested on:
i386 (kvm), x86_64(native)
Compile tested on:
s390 arm sparc sparc64 mips ia64
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/gfp.h | 111 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 96 insertions(+), 15 deletions(-)
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h 2009-05-22 14:03:37.000000000 -0500
+++ linux-2.6/include/linux/gfp.h 2009-05-26 11:56:48.000000000 -0500
@@ -20,7 +20,8 @@ struct vm_area_struct;
#define __GFP_DMA ((__force gfp_t)0x01u)
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
#define __GFP_DMA32 ((__force gfp_t)0x04u)
-
+#define __GFP_MOVABLE ((__force gfp_t)0x08u) /* Page is movable */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
/*
* Action modifiers - doesn't change the zoning
*
@@ -50,7 +51,6 @@ struct vm_area_struct;
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
-#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */
#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -112,24 +112,105 @@ static inline int allocflags_to_migratet
((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
-static inline enum zone_type gfp_zone(gfp_t flags)
-{
+#ifdef CONFIG_ZONE_HIGHMEM
+#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+#else
+#define OPT_ZONE_HIGHMEM ZONE_NORMAL
+#endif
+
#ifdef CONFIG_ZONE_DMA
- if (flags & __GFP_DMA)
- return ZONE_DMA;
+#define OPT_ZONE_DMA ZONE_DMA
+#else
+#define OPT_ZONE_DMA ZONE_NORMAL
#endif
+
#ifdef CONFIG_ZONE_DMA32
- if (flags & __GFP_DMA32)
- return ZONE_DMA32;
+#define OPT_ZONE_DMA32 ZONE_DMA32
+#else
+#define OPT_ZONE_DMA32 OPT_ZONE_DMA
+#endif
+
+/*
+ * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
+ * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
+ * and there are 16 of them to cover all possible combinations of
+ * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM
+ *
+ * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
+ * But GFP_MOVABLE is not only a zone specifier but also an allocation
+ * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
+ * Only 1bit of the lowest 3 bit (DMA,DMA32,HIGHMEM) can be set to "1".
+ *
+ * bit result
+ * =================
+ * 0x0 => NORMAL
+ * 0x1 => DMA or NORMAL
+ * 0x2 => HIGHMEM or NORMAL
+ * 0x3 => BAD (DMA+HIGHMEM)
+ * 0x4 => DMA32 or DMA or NORMAL
+ * 0x5 => BAD (DMA+DMA32)
+ * 0x6 => BAD (HIGHMEM+DMA32)
+ * 0x7 => BAD (HIGHMEM+DMA32+DMA)
+ * 0x8 => NORMAL (MOVABLE+0)
+ * 0x9 => DMA or NORMAL (MOVABLE+DMA)
+ * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too)
+ * 0xb => BAD (MOVABLE+HIGHMEM+DMA)
+ * 0xc => DMA32 (MOVABLE+HIGHMEM+DMA32)
+ * 0xd => BAD (MOVABLE+DMA32+DMA)
+ * 0xe => BAD (MOVABLE+DMA32+HIGHMEM)
+ * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
+ *
+ * ZONES_SHIFT must be <= 2 on 32 bit platforms.
+ */
+
+#if 16 * ZONES_SHIFT > BITS_PER_LONG
+#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif
- if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
- (__GFP_HIGHMEM | __GFP_MOVABLE))
- return ZONE_MOVABLE;
-#ifdef CONFIG_HIGHMEM
- if (flags & __GFP_HIGHMEM)
- return ZONE_HIGHMEM;
+
+#define GFP_ZONE_TABLE ( \
+ (ZONE_NORMAL << 0 * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \
+ | (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT) \
+ | (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT) \
+ | (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT) \
+ | (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
+ | (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
+)
+
+/*
+ * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32
+ * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
+ * entry starting with bit 0. Bit is set if the combination is not
+ * allowed.
+ */
+#define GFP_ZONE_BAD ( \
+ 1 << (__GFP_DMA | __GFP_HIGHMEM) \
+ | 1 << (__GFP_DMA | __GFP_DMA32) \
+ | 1 << (__GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM) \
+ | 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
+)
+
+static inline enum zone_type gfp_zone(gfp_t flags)
+{
+ enum zone_type z;
+ int bit = flags & GFP_ZONEMASK;
+
+ z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
+ ((1 << ZONES_SHIFT) - 1);
+
+ if (__builtin_constant_p(bit))
+ BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+ else {
+#ifdef CONFIG_DEBUG_VM
+ BUG_ON((GFP_ZONE_BAD >> bit) & 1);
#endif
- return ZONE_NORMAL;
+ }
+ return z;
}
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-25 11:30 ` Mel Gorman
@ 2009-05-26 18:04 ` Christoph Lameter
2009-05-26 23:26 ` Mel Gorman
0 siblings, 1 reply; 8+ messages in thread
From: Christoph Lameter @ 2009-05-26 18:04 UTC (permalink / raw)
To: Mel Gorman; +Cc: akpm, linux-mm, npiggin, KAMEZAWA Hiroyuki
On Mon, 25 May 2009, Mel Gorman wrote:
> I expect that the machine would start running into reclaim issues with
> enough uptime because it'll not be using Highmem as it should. Similarly,
> the GFP_DMA32 may also be a problem as the new implementation is going
> ZONE_DMA when ZONE_NORMAL would have been ok in this case.
Right. The fallback for DMA32 is wrong. Should fall back to ZONE_NORMAL.
Not to DMA. And the config variable to check for highmem was wrong.
Subject: Fix gfp zone patch
1. If there is no DMA32 fall back to NORMAL instead of DMA
2. Use the correct config variable for HIGHMEM
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/gfp.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h 2009-05-26 12:59:19.000000000 -0500
+++ linux-2.6/include/linux/gfp.h 2009-05-26 12:59:31.000000000 -0500
@@ -112,7 +112,7 @@ static inline int allocflags_to_migratet
((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
-#ifdef CONFIG_ZONE_HIGHMEM
+#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
@@ -127,7 +127,7 @@ static inline int allocflags_to_migratet
#ifdef CONFIG_ZONE_DMA32
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
-#define OPT_ZONE_DMA32 OPT_ZONE_DMA
+#define OPT_ZONE_DMA32 ZONE_NORMAL
#endif
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-26 18:04 ` Christoph Lameter
@ 2009-05-26 23:26 ` Mel Gorman
2009-05-27 9:48 ` Mel Gorman
0 siblings, 1 reply; 8+ messages in thread
From: Mel Gorman @ 2009-05-26 23:26 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, npiggin, KAMEZAWA Hiroyuki
On Tue, May 26, 2009 at 02:04:35PM -0400, Christoph Lameter wrote:
> On Mon, 25 May 2009, Mel Gorman wrote:
>
> > I expect that the machine would start running into reclaim issues with
> > enough uptime because it'll not be using Highmem as it should. Similarly,
> > the GFP_DMA32 may also be a problem as the new implementation is going
> > ZONE_DMA when ZONE_NORMAL would have been ok in this case.
>
> Right. The fallback for DMA32 is wrong. Should fall back to ZONE_NORMAL.
> Not to DMA. And the config variable to check for highmem was wrong.
>
That fixed things right up on x86 at least and it looks good. I've queued
up a few tests with the patch applied on x86, x86-64 and ppc64. Hopefully
it'll go smoothly.
For your patch + fix merged
Acked-by: Mel Gorman <mel@csn.ul.ie>
>
> Subject: Fix gfp zone patch
>
> 1. If there is no DMA32 fall back to NORMAL instead of DMA
>
> 2. Use the correct config variable for HIGHMEM
>
> Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
>
>
> ---
> include/linux/gfp.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> Index: linux-2.6/include/linux/gfp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/gfp.h 2009-05-26 12:59:19.000000000 -0500
> +++ linux-2.6/include/linux/gfp.h 2009-05-26 12:59:31.000000000 -0500
> @@ -112,7 +112,7 @@ static inline int allocflags_to_migratet
> ((gfp_flags & __GFP_RECLAIMABLE) != 0);
> }
>
> -#ifdef CONFIG_ZONE_HIGHMEM
> +#ifdef CONFIG_HIGHMEM
> #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
> #else
> #define OPT_ZONE_HIGHMEM ZONE_NORMAL
> @@ -127,7 +127,7 @@ static inline int allocflags_to_migratet
> #ifdef CONFIG_ZONE_DMA32
> #define OPT_ZONE_DMA32 ZONE_DMA32
> #else
> -#define OPT_ZONE_DMA32 OPT_ZONE_DMA
> +#define OPT_ZONE_DMA32 ZONE_NORMAL
> #endif
>
> /*
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-26 23:26 ` Mel Gorman
@ 2009-05-27 9:48 ` Mel Gorman
2009-05-27 14:23 ` Christoph Lameter
0 siblings, 1 reply; 8+ messages in thread
From: Mel Gorman @ 2009-05-27 9:48 UTC (permalink / raw)
To: Christoph Lameter; +Cc: akpm, linux-mm, npiggin, KAMEZAWA Hiroyuki
On Wed, May 27, 2009 at 12:26:20AM +0100, Mel Gorman wrote:
> On Tue, May 26, 2009 at 02:04:35PM -0400, Christoph Lameter wrote:
> > On Mon, 25 May 2009, Mel Gorman wrote:
> >
> > > I expect that the machine would start running into reclaim issues with
> > > enough uptime because it'll not be using Highmem as it should. Similarly,
> > > the GFP_DMA32 may also be a problem as the new implementation is going
> > > ZONE_DMA when ZONE_NORMAL would have been ok in this case.
> >
> > Right. The fallback for DMA32 is wrong. Should fall back to ZONE_NORMAL.
> > Not to DMA. And the config variable to check for highmem was wrong.
> >
>
> That fixed things right up on x86 at least and it looks good. I've queued
> up a few tests with the patch applied on x86, x86-64 and ppc64. Hopefully
> it'll go smoothly.
>
It didn't go perfectly smoothly but I have some results. First off the new
gfp_zone() is now returning the same results for the old gfp_zone() for the
common flag combinations on ppc64, x86 and x86-64. That is good.
On x86-64 (Phenom II X4)
netperf is showing +/- 1.8% on UDP and TCP tests, consider level
sysbench is showing +/- 1% on postgres, mostly level
kernbench is showing +1.7% on system time
kernbench is showing 0.25% on elapsed time
On ppc64 (ppc970)
netperf failed to run overnight, my own fault
sysbench is showing, +1.95%
kernbench is showing +0.21% on system time
kernbench is showing -0.01% on elapsed time
The x86 machine was running other tests and didn't catch up in time.
The performance results are mostly good. kernbench is the most allocator
intensive by far and it showed reasonable gains on the system time for both
machines where you'd expect an allocator improvement to have the most impact.
Other results were either flat or showed small gains.
> For your patch + fix merged
>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
>
I'm happier with this now. After the tests and another read through the
patch, nothing else jumps out at me.
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Good work.
> >
> > Subject: Fix gfp zone patch
> >
> > 1. If there is no DMA32 fall back to NORMAL instead of DMA
> >
> > 2. Use the correct config variable for HIGHMEM
> >
> > Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
> >
> >
> > ---
> > include/linux/gfp.h | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > Index: linux-2.6/include/linux/gfp.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/gfp.h 2009-05-26 12:59:19.000000000 -0500
> > +++ linux-2.6/include/linux/gfp.h 2009-05-26 12:59:31.000000000 -0500
> > @@ -112,7 +112,7 @@ static inline int allocflags_to_migratet
> > ((gfp_flags & __GFP_RECLAIMABLE) != 0);
> > }
> >
> > -#ifdef CONFIG_ZONE_HIGHMEM
> > +#ifdef CONFIG_HIGHMEM
> > #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
> > #else
> > #define OPT_ZONE_HIGHMEM ZONE_NORMAL
> > @@ -127,7 +127,7 @@ static inline int allocflags_to_migratet
> > #ifdef CONFIG_ZONE_DMA32
> > #define OPT_ZONE_DMA32 ZONE_DMA32
> > #else
> > -#define OPT_ZONE_DMA32 OPT_ZONE_DMA
> > +#define OPT_ZONE_DMA32 ZONE_NORMAL
> > #endif
> >
> > /*
> >
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator
2009-05-27 9:48 ` Mel Gorman
@ 2009-05-27 14:23 ` Christoph Lameter
0 siblings, 0 replies; 8+ messages in thread
From: Christoph Lameter @ 2009-05-27 14:23 UTC (permalink / raw)
To: Mel Gorman; +Cc: akpm, linux-mm, npiggin, KAMEZAWA Hiroyuki
On Wed, 27 May 2009, Mel Gorman wrote:
> I'm happier with this now. After the tests and another read through the
> patch, nothing else jumps out at me.
>
> Reviewed-by: Mel Gorman <mel@csn.ul.ie>
>
> Good work.
It would not have been possible without your help and the testing you did.
Thanks.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-05-27 14:23 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-22 18:42 [PATCH] Use integer fields lookup for gfp_zone and check for errors in flags passed to the page allocator Christoph Lameter
2009-05-25 1:51 ` KAMEZAWA Hiroyuki
2009-05-26 16:58 ` Christoph Lameter
2009-05-25 11:30 ` Mel Gorman
2009-05-26 18:04 ` Christoph Lameter
2009-05-26 23:26 ` Mel Gorman
2009-05-27 9:48 ` Mel Gorman
2009-05-27 14:23 ` Christoph Lameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox