* [PATCH] Get rid of zone_table V2
@ 2006-09-18 19:21 Christoph Lameter
2006-09-18 20:28 ` Andrew Morton
2006-09-24 10:06 ` Andrew Morton
0 siblings, 2 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-18 19:21 UTC (permalink / raw)
To: akpm; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
V1->V2
- Optimize section to node lookup table for 32 bit platforms
that cannot fit the node into page-flags.
The zone table is mostly not needed. If we have a node in the
page flags then we can get to the zone via NODE_DATA() which
is much more likely to be already in the cpu cache.
In case of SMP and UP NODE_DATA() is a constant pointer which
allows us to access an exact replica of zonetable in the node_zones
field. In all of the above cases there will be no need at all for the
zone table.
The only remaining case is if in a NUMA system the node
numbers do not fit into the page flags. In that case
we make sparse generate a table that maps sections to
nodes and use that table to to figure out the node number. This
table is sized to fit in a single cache line for the known
32 bit NUMA platform which makes it very likely that the information
can be obtained without a cache miss.
For sparsemem the zone table seems to be have been fairly large based on
the maximum possible number of sections and the number of zones per node.
There is some memory saving by removing zone_table. The main benefit
is to reduce the cache foootprint of the VM from the frequent lookups
of zones. Plus it simplifies the page allocator.
Tested on IA64(NUMA) and x86_64 (UP)
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.18-rc6-mm2/include/linux/mm.h
===================================================================
--- linux-2.6.18-rc6-mm2.orig/include/linux/mm.h 2006-09-18 14:09:35.937295682 -0500
+++ linux-2.6.18-rc6-mm2/include/linux/mm.h 2006-09-18 14:09:42.802125419 -0500
@@ -395,7 +395,9 @@ void split_page(struct page *page, unsig
* We are going to use the flags for the page to node mapping if its in
* there. This includes the case where there is no node, so it is implicit.
*/
-#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0)
+#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0)
+#define NODE_NOT_IN_PAGE_FLAGS
+#endif
#ifndef PFN_SECTION_SHIFT
#define PFN_SECTION_SHIFT 0
@@ -410,13 +412,13 @@ void split_page(struct page *page, unsig
#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
-/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */
-#if FLAGS_HAS_NODE
-#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT)
+/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */
+#ifdef NODE_NOT_IN_PAGEFLAGS
+#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
#else
-#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
+#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
#endif
-#define ZONETABLE_PGSHIFT ZONES_PGSHIFT
+#define ZONEID_PGSHIFT ZONES_PGSHIFT
#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
@@ -425,23 +427,24 @@ void split_page(struct page *page, unsig
#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
-#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1)
+#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
static inline enum zone_type page_zonenum(struct page *page)
{
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
-struct zone;
-extern struct zone *zone_table[];
-
+/*
+ * The identification function is only used by the buddy allocator for
+ * determining if two pages could be buddies. We are not really
+ * identifying a zone since we could be using a the section number
+ * id if we have not node id available in page flags.
+ * We guarantee only that it will return the same value for two
+ * combinable pages in a zone.
+ */
static inline int page_zone_id(struct page *page)
{
- return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
-}
-static inline struct zone *page_zone(struct page *page)
-{
- return zone_table[page_zone_id(page)];
+ return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
static inline unsigned long zone_to_nid(struct zone *zone)
@@ -453,13 +456,20 @@ static inline unsigned long zone_to_nid(
#endif
}
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+extern unsigned long page_to_nid(struct page *page);
+#else
static inline unsigned long page_to_nid(struct page *page)
{
- if (FLAGS_HAS_NODE)
- return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
- else
- return zone_to_nid(page_zone(page));
+ return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
}
+#endif
+
+static inline struct zone *page_zone(struct page *page)
+{
+ return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
+}
+
static inline unsigned long page_to_section(struct page *page)
{
return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -476,6 +486,7 @@ static inline void set_page_node(struct
page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}
+
static inline void set_page_section(struct page *page, unsigned long section)
{
page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
@@ -976,8 +987,6 @@ extern void mem_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
-extern void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
- unsigned long pfn, unsigned long size);
#ifdef CONFIG_NUMA
extern void setup_per_cpu_pageset(void);
Index: linux-2.6.18-rc6-mm2/mm/sparse.c
===================================================================
--- linux-2.6.18-rc6-mm2.orig/mm/sparse.c 2006-09-18 13:16:09.239731256 -0500
+++ linux-2.6.18-rc6-mm2/mm/sparse.c 2006-09-18 14:10:22.720490405 -0500
@@ -24,6 +24,25 @@ struct mem_section mem_section[NR_SECTIO
#endif
EXPORT_SYMBOL(mem_section);
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+/*
+ * If we did not store the node number in the page then we have to
+ * do a lookup in the section_to_node_table in order to find which
+ * node the page belongs to.
+ */
+#if MAX_NUMNODES <= 256
+static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
+#else
+static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
+#endif
+
+extern unsigned long page_to_nid(struct page *page)
+{
+ return section_to_node_table[page_to_section(page)];
+}
+EXPORT_SYMBOL(page_to_nid);
+#endif
+
#ifdef CONFIG_SPARSEMEM_EXTREME
static struct mem_section *sparse_index_alloc(int nid)
{
@@ -49,6 +68,10 @@ static int sparse_index_init(unsigned lo
struct mem_section *section;
int ret = 0;
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+ section_to_node_table[section_nr] = nid;
+#endif
+
if (mem_section[root])
return -EEXIST;
Index: linux-2.6.18-rc6-mm2/mm/page_alloc.c
===================================================================
--- linux-2.6.18-rc6-mm2.orig/mm/page_alloc.c 2006-09-18 14:09:30.737643745 -0500
+++ linux-2.6.18-rc6-mm2/mm/page_alloc.c 2006-09-18 14:09:42.836307954 -0500
@@ -82,13 +82,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_Z
EXPORT_SYMBOL(totalram_pages);
-/*
- * Used by page_zone() to look up the address of the struct zone whose
- * id is encoded in the upper bits of page->flags
- */
-struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
-EXPORT_SYMBOL(zone_table);
-
static char *zone_names[MAX_NR_ZONES] = {
"DMA",
#ifdef CONFIG_ZONE_DMA32
@@ -1806,20 +1799,6 @@ void zone_init_free_lists(struct pglist_
}
}
-#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
-void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
- unsigned long pfn, unsigned long size)
-{
- unsigned long snum = pfn_to_section_nr(pfn);
- unsigned long end = pfn_to_section_nr(pfn + size);
-
- if (FLAGS_HAS_NODE)
- zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
- else
- for (; snum <= end; snum++)
- zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
-}
-
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
memmap_init_zone((size), (nid), (zone), (start_pfn))
@@ -2524,7 +2503,6 @@ static void __meminit free_area_init_cor
if (!size)
continue;
- zonetable_add(zone, nid, j, zone_start_pfn, size);
ret = init_currently_empty_zone(zone, zone_start_pfn, size);
BUG_ON(ret);
zone_start_pfn += size;
Index: linux-2.6.18-rc6-mm2/mm/memory_hotplug.c
===================================================================
--- linux-2.6.18-rc6-mm2.orig/mm/memory_hotplug.c 2006-09-18 13:16:09.257310833 -0500
+++ linux-2.6.18-rc6-mm2/mm/memory_hotplug.c 2006-09-18 14:09:42.855840832 -0500
@@ -72,7 +72,6 @@ static int __add_zone(struct zone *zone,
return ret;
}
memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
- zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages);
return 0;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 19:21 [PATCH] Get rid of zone_table V2 Christoph Lameter
@ 2006-09-18 20:28 ` Andrew Morton
2006-09-18 22:51 ` Christoph Lameter
2006-09-24 10:06 ` Andrew Morton
1 sibling, 1 reply; 36+ messages in thread
From: Andrew Morton @ 2006-09-18 20:28 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> The zone table is mostly not needed. If we have a node in the
> page flags then we can get to the zone via NODE_DATA() which
> is much more likely to be already in the cpu cache.
Adds a couple of hundred bytes of text to an x86 SMP build. Any
idea why? If it's things like page_zone() getting porkier then that's
a bit unfortunate - that's rather fastpath material.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 20:28 ` Andrew Morton
@ 2006-09-18 22:51 ` Christoph Lameter
2006-09-18 23:15 ` Andrew Morton
0 siblings, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-18 22:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006, Andrew Morton wrote:
> On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > The zone table is mostly not needed. If we have a node in the
> > page flags then we can get to the zone via NODE_DATA() which
> > is much more likely to be already in the cpu cache.
>
> Adds a couple of hundred bytes of text to an x86 SMP build. Any
> idea why? If it's things like page_zone() getting porkier then that's
> a bit unfortunate - that's rather fastpath material.
In an SMP/UP configuration we do not need to do any lookup since
NODE_DATA() is constant. We calculate the address of the zone which may be
more code than a lookup.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 22:51 ` Christoph Lameter
@ 2006-09-18 23:15 ` Andrew Morton
2006-09-18 23:46 ` Christoph Lameter
0 siblings, 1 reply; 36+ messages in thread
From: Andrew Morton @ 2006-09-18 23:15 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 15:51:25 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> On Mon, 18 Sep 2006, Andrew Morton wrote:
>
> > On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
> > Christoph Lameter <clameter@sgi.com> wrote:
> >
> > > The zone table is mostly not needed. If we have a node in the
> > > page flags then we can get to the zone via NODE_DATA() which
> > > is much more likely to be already in the cpu cache.
> >
> > Adds a couple of hundred bytes of text to an x86 SMP build. Any
> > idea why? If it's things like page_zone() getting porkier then that's
> > a bit unfortunate - that's rather fastpath material.
>
> In an SMP/UP configuration we do not need to do any lookup since
> NODE_DATA() is constant. We calculate the address of the zone which may be
> more code than a lookup.
So it looks like we've made UP and small SMP worse, while providing some
undescribed level of benefit to big NUMA? Not a popular tradeoff, that.
We call page_zone() rather a lot, and looking at the proposed new version
is scary:
static inline enum zone_type page_zonenum(struct page *page)
{
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
static inline unsigned long page_to_nid(struct page *page)
{
return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
}
static inline struct zone *page_zone(struct page *page)
{
return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
}
Not only does it add a whole bunch of pointer derefs and arithmetic (and a
probably unnecessary second indirection for page->flags), it also brings a
read of contig_page_data[] into the picture.
So... it's not obvious to me that this is an aggregate improvement?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 23:15 ` Andrew Morton
@ 2006-09-18 23:46 ` Christoph Lameter
2006-09-18 23:58 ` Andrew Morton
0 siblings, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-18 23:46 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006, Andrew Morton wrote:
> > In an SMP/UP configuration we do not need to do any lookup since
> > NODE_DATA() is constant. We calculate the address of the zone which may be
> > more code than a lookup.
>
> So it looks like we've made UP and small SMP worse, while providing some
> undescribed level of benefit to big NUMA? Not a popular tradeoff, that.
We avoid one memory reference for SMP and UP and do an address calculation
instead.
> We call page_zone() rather a lot, and looking at the proposed new version
> is scary:
> static inline struct zone *page_zone(struct page *page)
> {
> return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
> }
>
> Not only does it add a whole bunch of pointer derefs and arithmetic (and a
> probably unnecessary second indirection for page->flags), it also brings a
> read of contig_page_data[] into the picture.
It only is a single pointer deref from the node_data array indexed by
the node. The node_zones ref is an address calculation since the
node_zones is an array of zone and not a list of pointers to zones.
What do you mean by a read of contig_page_data?
NODE_DATA() is constant for the UP and SMP case.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 23:46 ` Christoph Lameter
@ 2006-09-18 23:58 ` Andrew Morton
2006-09-19 0:08 ` Christoph Lameter
2006-09-19 0:14 ` Christoph Lameter
0 siblings, 2 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-18 23:58 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 16:46:25 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> On Mon, 18 Sep 2006, Andrew Morton wrote:
>
> > > In an SMP/UP configuration we do not need to do any lookup since
> > > NODE_DATA() is constant. We calculate the address of the zone which may be
> > > more code than a lookup.
> >
> > So it looks like we've made UP and small SMP worse, while providing some
> > undescribed level of benefit to big NUMA? Not a popular tradeoff, that.
>
> We avoid one memory reference for SMP and UP and do an address calculation
> instead.
What memory reference do we avoid? zone_table?
In exchange for that we've added an additional deref of page->flags and a
new read from contig_page_data.
> > We call page_zone() rather a lot, and looking at the proposed new version
> > is scary:
>
> > static inline struct zone *page_zone(struct page *page)
> > {
> > return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
> > }
> >
> > Not only does it add a whole bunch of pointer derefs and arithmetic (and a
> > probably unnecessary second indirection for page->flags), it also brings a
> > read of contig_page_data[] into the picture.
>
> It only is a single pointer deref from the node_data array indexed by
> the node. The node_zones ref is an address calculation since the
> node_zones is an array of zone and not a list of pointers to zones.
>
> What do you mean by a read of contig_page_data?
>
> NODE_DATA() is constant for the UP and SMP case.
setenv ARCH i386
make allnoconfig
make mm/page_alloc.i
grep contig_page_data mm/page_alloc.i
and that's mainline. Changing page_zone to also read from contig_page_data
will presumably worsen things.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 23:58 ` Andrew Morton
@ 2006-09-19 0:08 ` Christoph Lameter
2006-09-19 0:14 ` Christoph Lameter
1 sibling, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 0:08 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006, Andrew Morton wrote:
> > We avoid one memory reference for SMP and UP and do an address calculation
> > instead.
>
> What memory reference do we avoid? zone_table?
Yes.
> In exchange for that we've added an additional deref of page->flags and a
> new read from contig_page_data.
The additional deref of page->flags is the same as before. The compiler
optimizes that one away. We need to extract two sets of bits from the same
register.
> > NODE_DATA() is constant for the UP and SMP case.
>
> setenv ARCH i386
> make allnoconfig
> make mm/page_alloc.i
> grep contig_page_data mm/page_alloc.i
>
> and that's mainline. Changing page_zone to also read from contig_page_data
> will presumably worsen things.
Hmmm... I have not checked i386 code generation.
But include/linux/mmzone.h has
extern struct pglist_data contig_page_data;
#define NODE_DATA(nid) (&contig_page_data)
So we should have an address there on SMP/UP.
NODE_DATA(nid0->node_zones == contig_page_data.node_zones
which I thought would still be constant. Then we calculate the address of
the ith element of node_zones.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 23:58 ` Andrew Morton
2006-09-19 0:08 ` Christoph Lameter
@ 2006-09-19 0:14 ` Christoph Lameter
2006-09-19 0:31 ` Andrew Morton
1 sibling, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 0:14 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
i386 code for __inc_zone_page_state which does
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
__inc_zone_state(page_zone(page), item);
}
EXPORT_SYMBOL(__inc_zone_page_state);
objdump
0000078f <__inc_zone_page_state>:
78f: 8b 00 mov (%eax),%eax
791: c1 e8 19 shr $0x19,%eax
794: 83 e0 01 and $0x1,%eax
797: 69 c0 80 04 00 00 imul $0x480,%eax,%eax
79d: 05 00 00 00 00 add $0x0,%eax
7a2: e9 50 fe ff ff jmp 5f7 <__inc_zone_state>
Disassembly of section .altinstr_replacement:
note no lookup anymore.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 0:14 ` Christoph Lameter
@ 2006-09-19 0:31 ` Andrew Morton
2006-09-19 1:20 ` Christoph Lameter
2006-09-19 6:12 ` Christoph Lameter
0 siblings, 2 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-19 0:31 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 17:14:20 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> i386 code for __inc_zone_page_state which does
>
> void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
> {
> __inc_zone_state(page_zone(page), item);
> }
> EXPORT_SYMBOL(__inc_zone_page_state);
>
> objdump
>
> 0000078f <__inc_zone_page_state>:
> 78f: 8b 00 mov (%eax),%eax
> 791: c1 e8 19 shr $0x19,%eax
> 794: 83 e0 01 and $0x1,%eax
> 797: 69 c0 80 04 00 00 imul $0x480,%eax,%eax
> 79d: 05 00 00 00 00 add $0x0,%eax
> 7a2: e9 50 fe ff ff jmp 5f7 <__inc_zone_state>
> Disassembly of section .altinstr_replacement:
>
> note no lookup anymore.
With the mm tree up to but not including get-rid-of-zone_table.patch:
.globl __inc_zone_page_state
.type __inc_zone_page_state, @function
__inc_zone_page_state:
.LFB669:
.loc 1 259 0
.LVL183:
pushl %ebp #
.LCFI113:
movl %esp, %ebp #,
.LCFI114:
.loc 1 260 0
movl 8(%ebp), %eax # page, page
movl 12(%ebp), %edx # item, item
movl (%eax), %eax # <variable>.flags, <variable>.flags
shrl $30, %eax #, <variable>.flags
movl zone_table(,%eax,4), %eax # zone_table, tmp64
call __inc_zone_state #
.loc 1 261 0
popl %ebp #
ret
With the mm tree up to and including get-rid-of-zone_table.patch:
.globl __inc_zone_page_state
.type __inc_zone_page_state, @function
__inc_zone_page_state:
.LFB669:
.loc 1 259 0
.LVL183:
pushl %ebp #
.LCFI113:
movl %esp, %ebp #,
.LCFI114:
.loc 1 260 0
movl 8(%ebp), %eax # page, page
movl 12(%ebp), %edx # item, item
movl (%eax), %eax # <variable>.flags, <variable>.flags
shrl $30, %eax #, <variable>.flags
leal (%eax,%eax,2), %eax #, tmp65
leal (%eax,%eax,8), %eax #, tmp67
sall $5, %eax #, tmp67
addl $contig_page_data, %eax #, tmp69
call __inc_zone_state #
.loc 1 261 0
popl %ebp #
ret
Which is pretty much the same thing. I assume your objdump was of
an unlinked .o file, so contig_page_data shows up as 0x0.
The code looks OK though.
It would be nice to be able to reclaim a few bits from page->flags - we're
awfully short on them.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 0:31 ` Andrew Morton
@ 2006-09-19 1:20 ` Christoph Lameter
2006-09-19 6:12 ` Christoph Lameter
1 sibling, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 1:20 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006, Andrew Morton wrote:
> Which is pretty much the same thing. I assume your objdump was of
> an unlinked .o file, so contig_page_data shows up as 0x0.
Correct.
> The code looks OK though.
>
> It would be nice to be able to reclaim a few bits from page->flags - we're
> awfully short on them.
With the zone reduction patchset we already have an additional bit. If you
look at the i386 code it does an "and 1,ax". With the optional zone dma
patch we will have an additional bit because then there are no zones
anymore for SMP and UP. At that point page_zone() becomes a constant.
The node id is essential for NUMA locality and we cannot easily remove
that from the page flags without additional lookups.
Configurations using DISCONTIG do not need the section bits that
sparsemem requires. Sparsemem is tunable though. If you configure coarser
granularity then more bits can be recovered.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 0:31 ` Andrew Morton
2006-09-19 1:20 ` Christoph Lameter
@ 2006-09-19 6:12 ` Christoph Lameter
2006-09-19 6:33 ` Andrew Morton
1 sibling, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 6:12 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
Hmmm... Actually one can get much better code. If I remove the padding
from struct zone then struct zone shrinks from 0x480 to 0x400 in length
on i386 smp and then we get code without a multiply operation:
00000787 <__inc_zone_page_state>:
787: 8b 00 mov (%eax),%eax
789: c1 e8 0f shr $0xf,%eax
78c: 25 00 04 00 00 and $0x400,%eax
791: 05 00 00 00 00 add $0x0,%eax
796: e9 58 fe ff ff jmp 5f3 <__inc_zone_state>
Is there some way to get a structure to be sized a power of two? This is
so nice and small that one would do good to inline __inc_zone_page_state.
Remove padding from struct zone
Index: linux-2.6.18-rc6-mm1/include/linux/mmzone.h
===================================================================
--- linux-2.6.18-rc6-mm1.orig/include/linux/mmzone.h 2006-09-18 20:20:34.000000000 -0700
+++ linux-2.6.18-rc6-mm1/include/linux/mmzone.h 2006-09-18 20:34:24.000000000 -0700
@@ -31,21 +31,6 @@
struct pglist_data;
-/*
- * zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
- * So add a wild amount of padding here to ensure that they fall into separate
- * cachelines. There are very few zone structures in the machine, so space
- * consumption is not a concern here.
- */
-#if defined(CONFIG_SMP)
-struct zone_padding {
- char x[0];
-} ____cacheline_internodealigned_in_smp;
-#define ZONE_PADDING(name) struct zone_padding name;
-#else
-#define ZONE_PADDING(name)
-#endif
-
enum zone_stat_item {
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
@@ -186,9 +171,6 @@
#endif
struct free_area free_area[MAX_ORDER];
-
- ZONE_PADDING(_pad1_)
-
/* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock;
struct list_head active_list;
@@ -231,7 +213,6 @@
int prev_priority;
- ZONE_PADDING(_pad2_)
/* Rarely used or read-mostly fields */
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 6:12 ` Christoph Lameter
@ 2006-09-19 6:33 ` Andrew Morton
2006-09-19 14:10 ` Christoph Lameter
0 siblings, 1 reply; 36+ messages in thread
From: Andrew Morton @ 2006-09-19 6:33 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 23:12:31 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> Hmmm... Actually one can get much better code. If I remove the padding
> from struct zone then struct zone shrinks from 0x480 to 0x400 in length
That padding is there to prevent the rather unrelated lru_lock and
list_lock from falling into the same cacheline, which is presumed to be a
bad thing...
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 6:33 ` Andrew Morton
@ 2006-09-19 14:10 ` Christoph Lameter
2006-09-19 15:38 ` Andrew Morton
0 siblings, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 14:10 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006, Andrew Morton wrote:
> On Mon, 18 Sep 2006 23:12:31 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > Hmmm... Actually one can get much better code. If I remove the padding
> > from struct zone then struct zone shrinks from 0x480 to 0x400 in length
>
> That padding is there to prevent the rather unrelated lru_lock and
> list_lock from falling into the same cacheline, which is presumed to be a
> bad thing...
Yeah but we have added so much stuff in between that such a thing is
highly unlikely. Even with padding we could increase the size of zone to
the next power of two.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 14:10 ` Christoph Lameter
@ 2006-09-19 15:38 ` Andrew Morton
2006-09-19 15:41 ` Christoph Lameter
2006-09-19 16:17 ` Nick Piggin
0 siblings, 2 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-19 15:38 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Tue, 19 Sep 2006 07:10:18 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> On Mon, 18 Sep 2006, Andrew Morton wrote:
>
> > On Mon, 18 Sep 2006 23:12:31 -0700 (PDT)
> > Christoph Lameter <clameter@sgi.com> wrote:
> >
> > > Hmmm... Actually one can get much better code. If I remove the padding
> > > from struct zone then struct zone shrinks from 0x480 to 0x400 in length
> >
> > That padding is there to prevent the rather unrelated lru_lock and
> > list_lock from falling into the same cacheline, which is presumed to be a
> > bad thing...
>
> Yeah but we have added so much stuff in between that such a thing is
> highly unlikely. Even with padding we could increase the size of zone to
> the next power of two.
unlikely != impossible. If some distro were to send the "unlikely" to
zillions of users, that would be sad.
I suspect what we want in there is to make the buddy-allocator's fields
land in a separate cacheline from the vm-scanner's fields. I don't think
the code has been reviewed for correctness wrt that for quite some time.
If there's some smarter way of doing the padding then cool.
Of course, it might be that we _want_ both things in the same cacheline:
the page scanner often frees pages (we hope ;)). And it'll usually be the
case that a page-allocator immediately goes and adds the page to the LRU.
ANd ditto for a page-freer.
Plus page-scanning is a much less common thing than page-allocating anyway.
So it's not completely obvious what the best approach is here. It's an
area of some delicacy which requires some thought and testing.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 15:38 ` Andrew Morton
@ 2006-09-19 15:41 ` Christoph Lameter
2006-09-19 16:23 ` Nick Piggin
2006-09-19 16:17 ` Nick Piggin
1 sibling, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 15:41 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Tue, 19 Sep 2006, Andrew Morton wrote:
> So it's not completely obvious what the best approach is here. It's an
> area of some delicacy which requires some thought and testing.
The primary thing that I though was worth doing is to get the size of
struct zone to a power of 2. Then the multiply is avoided in page_zone.
It just happened that this was possible by removing the
padding.
Would be okay to grow the size of struct zone to the next power of 2
bytes?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 15:38 ` Andrew Morton
2006-09-19 15:41 ` Christoph Lameter
@ 2006-09-19 16:17 ` Nick Piggin
1 sibling, 0 replies; 36+ messages in thread
From: Nick Piggin @ 2006-09-19 16:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: Christoph Lameter, linux-mm, Andy Whitcroft, Dave Hansen
Andrew Morton wrote:
> On Tue, 19 Sep 2006 07:10:18 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>>Yeah but we have added so much stuff in between that such a thing is
>>highly unlikely. Even with padding we could increase the size of zone to
>>the next power of two.
>
>
> unlikely != impossible. If some distro were to send the "unlikely" to
> zillions of users, that would be sad.
I suspect if nobody else, then the ScaleMP guys would be sad about
this. But as you say, it is probably even worse that anybody might
suddenly get suboptimal behaviour depending on the phase of the moon.
>
> I suspect what we want in there is to make the buddy-allocator's fields
> land in a separate cacheline from the vm-scanner's fields. I don't think
> the code has been reviewed for correctness wrt that for quite some time.
>
> If there's some smarter way of doing the padding then cool.
I probably touched this padding stuff last, and indeed I tried to break
it into allocator / scanner / readmostly. There is quite a bit of
readmostly stuff in the allocator area (pages_* x 3, lowmem_reserve,
and the 2 CONFIG_NUMA fields), but it is generally accessed around the
same time as the other fields (lock and free_pages), so it might be
best to keep them together.
vm_stat may not be in the right place. I'd say put it up in the
allocator section, because quite a few stats happen there and
allocation is more performance critical than reclaim.
> Of course, it might be that we _want_ both things in the same cacheline:
> the page scanner often frees pages (we hope ;)). And it'll usually be the
> case that a page-allocator immediately goes and adds the page to the LRU.
> ANd ditto for a page-freer.
That's very true. There is some element of work batching though
(buddy allocation and freeing happening in batches, lru adding and
reclaim freeing happening in pagevec chunks).
So you'd still rather their cachelines don't overlap I suspect. Of
course, I can't imagine it will be easy to benchmark (other than on
ScaleMP systems, maybe).
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 15:41 ` Christoph Lameter
@ 2006-09-19 16:23 ` Nick Piggin
2006-09-19 16:45 ` Nick Piggin
0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2006-09-19 16:23 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, linux-mm, Andy Whitcroft, Dave Hansen
Christoph Lameter wrote:
> On Tue, 19 Sep 2006, Andrew Morton wrote:
>
>
>>So it's not completely obvious what the best approach is here. It's an
>>area of some delicacy which requires some thought and testing.
>
>
> The primary thing that I though was worth doing is to get the size of
> struct zone to a power of 2. Then the multiply is avoided in page_zone.
> It just happened that this was possible by removing the
> padding.
>
> Would be okay to grow the size of struct zone to the next power of 2
> bytes?
I'd say yes: it is only memory we're talking about here, not cachelines,
because the whole thing is cacheline aligned up the wazoo anyway. Let's
see, on your example system that would take up an extra 896 bytes per
struct zone... not too bad.
And it is a better solution than shrinking because it will work on all
architectures and configurations.
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 16:23 ` Nick Piggin
@ 2006-09-19 16:45 ` Nick Piggin
2006-09-19 17:50 ` Christoph Lameter
0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2006-09-19 16:45 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, linux-mm, Andy Whitcroft, Dave Hansen
Nick Piggin wrote:
> Christoph Lameter wrote:
>> Would be okay to grow the size of struct zone to the next power of 2
>> bytes?
>
>
> I'd say yes: it is only memory we're talking about here, not cachelines,
> because the whole thing is cacheline aligned up the wazoo anyway. Let's
> see, on your example system that would take up an extra 896 bytes per
> struct zone... not too bad.
>
> And it is a better solution than shrinking because it will work on all
> architectures and configurations.
BTW. I wonder why gcc isn't using two shifts in your example? Not that
I think it would be great even if it were, because subtle differences
could cause that to become more shifts...
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 16:45 ` Nick Piggin
@ 2006-09-19 17:50 ` Christoph Lameter
2006-09-19 18:24 ` Andi Kleen
0 siblings, 1 reply; 36+ messages in thread
From: Christoph Lameter @ 2006-09-19 17:50 UTC (permalink / raw)
To: Nick Piggin; +Cc: Andrew Morton, linux-mm, Andy Whitcroft, Dave Hansen
On Wed, 20 Sep 2006, Nick Piggin wrote:
> BTW. I wonder why gcc isn't using two shifts in your example? Not that
> I think it would be great even if it were, because subtle differences
> could cause that to become more shifts...
Perhaps multiply is highly optimized in contemporary processors and I am
worrying too much about the multiply?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-19 17:50 ` Christoph Lameter
@ 2006-09-19 18:24 ` Andi Kleen
0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2006-09-19 18:24 UTC (permalink / raw)
To: Christoph Lameter
Cc: Nick Piggin, Andrew Morton, linux-mm, Andy Whitcroft, Dave Hansen
On Tuesday 19 September 2006 19:50, Christoph Lameter wrote:
> On Wed, 20 Sep 2006, Nick Piggin wrote:
>
> > BTW. I wonder why gcc isn't using two shifts in your example? Not that
> > I think it would be great even if it were, because subtle differences
> > could cause that to become more shifts...
>
> Perhaps multiply is highly optimized in contemporary processors and I am
> worrying too much about the multiply?
It is. e.g. an Opteron can do a multiply in 3-4 cycles
Just arbitary division is still slow. Division by constant is usually
also not that bad because the compiler can decompose it into multiplies
and other operations.
-Andi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-18 19:21 [PATCH] Get rid of zone_table V2 Christoph Lameter
2006-09-18 20:28 ` Andrew Morton
@ 2006-09-24 10:06 ` Andrew Morton
2006-09-24 16:58 ` Christoph Lameter
2006-09-27 9:19 ` Andrew Morton
1 sibling, 2 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-24 10:06 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> static inline int page_zone_id(struct page *page)
> {
> - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
> -}
> -static inline struct zone *page_zone(struct page *page)
> -{
> - return zone_table[page_zone_id(page)];
> + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
> }
arm allmodconfig:
include/linux/mm.h: In function `page_zone_id':
include/linux/mm.h:450: warning: right shift count >= width of type
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-24 10:06 ` Andrew Morton
@ 2006-09-24 16:58 ` Christoph Lameter
2006-09-27 9:19 ` Andrew Morton
1 sibling, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-24 16:58 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm, Andy Whitcroft, Dave Hansen
On Sun, 24 Sep 2006, Andrew Morton wrote:
> arm allmodconfig:
>
> include/linux/mm.h: In function `page_zone_id':
> include/linux/mm.h:450: warning: right shift count >= width of type
That is a sparse config problem I think. I had this when trying to get
back from a sparse to a non sparse configuration on i386 f.e.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-24 10:06 ` Andrew Morton
2006-09-24 16:58 ` Christoph Lameter
@ 2006-09-27 9:19 ` Andrew Morton
2006-09-27 9:26 ` Andy Whitcroft
` (2 more replies)
1 sibling, 3 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-27 9:19 UTC (permalink / raw)
To: Christoph Lameter, linux-mm, Andy Whitcroft, Dave Hansen
On Sun, 24 Sep 2006 03:06:43 -0700
Andrew Morton <akpm@osdl.org> wrote:
> On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
> Christoph Lameter <clameter@sgi.com> wrote:
>
> > static inline int page_zone_id(struct page *page)
> > {
> > - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
> > -}
> > -static inline struct zone *page_zone(struct page *page)
> > -{
> > - return zone_table[page_zone_id(page)];
> > + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
> > }
>
> arm allmodconfig:
>
> include/linux/mm.h: In function `page_zone_id':
> include/linux/mm.h:450: warning: right shift count >= width of type
ping.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-27 9:19 ` Andrew Morton
@ 2006-09-27 9:26 ` Andy Whitcroft
2006-09-27 11:23 ` [PATCH] zone table removal miss merge Andy Whitcroft
2006-09-27 11:27 ` [PATCH] Get rid of zone_table V2 Andy Whitcroft
2 siblings, 0 replies; 36+ messages in thread
From: Andy Whitcroft @ 2006-09-27 9:26 UTC (permalink / raw)
To: Andrew Morton; +Cc: Christoph Lameter, linux-mm, Dave Hansen
Andrew Morton wrote:
> On Sun, 24 Sep 2006 03:06:43 -0700
> Andrew Morton <akpm@osdl.org> wrote:
>
>> On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
>> Christoph Lameter <clameter@sgi.com> wrote:
>>
>>> static inline int page_zone_id(struct page *page)
>>> {
>>> - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
>>> -}
>>> -static inline struct zone *page_zone(struct page *page)
>>> -{
>>> - return zone_table[page_zone_id(page)];
>>> + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
>>> }
>> arm allmodconfig:
>>
>> include/linux/mm.h: In function `page_zone_id':
>> include/linux/mm.h:450: warning: right shift count >= width of type
>
> ping.
I'll have a poke at this.
-apw
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH] zone table removal miss merge
2006-09-27 9:19 ` Andrew Morton
2006-09-27 9:26 ` Andy Whitcroft
@ 2006-09-27 11:23 ` Andy Whitcroft
2006-09-27 16:19 ` Christoph Lameter
2006-09-27 11:27 ` [PATCH] Get rid of zone_table V2 Andy Whitcroft
2 siblings, 1 reply; 36+ messages in thread
From: Andy Whitcroft @ 2006-09-27 11:23 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, Christoph Lameter, Andy Whitcroft, Dave Hansen
As suspected this is not related to SPARSEMEM configuration at all.
But relates to the case where the node,zone size is zero. Here we
then are trying to shift (sizeof(int) - 0) which is illegal.
We should be defining ZONEID_SHIFT in terms of ZONE_PGSHIFT not
ZONE_PGOFF. As this was correct in the orginal patch I assume this
was somehow damaged during merge.
The below should fix it.
-apw
=== 8< ===
zone table removal miss-merge
It looks very much like zone table removal v2 suffered during merge
into -mm. This patch is needed to get rid of the following errors
on arm (and I suspect other platforms):
include/linux/mm.h: In function `page_zone_id':
include/linux/mm.h:450: warning: right shift count >= width of type
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
---
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7997d9..2eb64fa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -421,7 +421,7 @@ #define ZONEID_SHIFT (SECTIONS_SHIFT +
#else
#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
#endif
-#define ZONEID_PGSHIFT ZONES_PGOFF
+#define ZONEID_PGSHIFT ZONES_PGSHIFT
#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-27 9:19 ` Andrew Morton
2006-09-27 9:26 ` Andy Whitcroft
2006-09-27 11:23 ` [PATCH] zone table removal miss merge Andy Whitcroft
@ 2006-09-27 11:27 ` Andy Whitcroft
2006-09-27 16:24 ` Andrew Morton
2006-09-30 18:47 ` Christoph Lameter
2 siblings, 2 replies; 36+ messages in thread
From: Andy Whitcroft @ 2006-09-27 11:27 UTC (permalink / raw)
To: Andrew Morton; +Cc: Christoph Lameter, linux-mm, Dave Hansen
Andrew Morton wrote:
> On Sun, 24 Sep 2006 03:06:43 -0700
> Andrew Morton <akpm@osdl.org> wrote:
>
>> On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
>> Christoph Lameter <clameter@sgi.com> wrote:
>>
>>> static inline int page_zone_id(struct page *page)
>>> {
>>> - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
>>> -}
>>> -static inline struct zone *page_zone(struct page *page)
>>> -{
>>> - return zone_table[page_zone_id(page)];
>>> + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
>>> }
>> arm allmodconfig:
>>
>> include/linux/mm.h: In function `page_zone_id':
>> include/linux/mm.h:450: warning: right shift count >= width of type
On a separate note. I was able to get this puppy compiling enough to
see this warning and fix it, but nowhere close to compiling a kernel.
First, are you compiling here to a real kernel. If so, is this on a
cross-compiler or a real system. If its a cross-compiler which version.
Do you have recipe for success?
I'd like to be incoporating more cross-compilation testing into our
nightlies, but this one isn't playing ball.
:/
-apw
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] zone table removal miss merge
2006-09-27 11:23 ` [PATCH] zone table removal miss merge Andy Whitcroft
@ 2006-09-27 16:19 ` Christoph Lameter
0 siblings, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-27 16:19 UTC (permalink / raw)
To: Andy Whitcroft; +Cc: Andrew Morton, linux-mm, linux-kernel, Dave Hansen
On Wed, 27 Sep 2006, Andy Whitcroft wrote:
> The below should fix it.
Acked-by: Christoph Lameter <clameter@sgi.com>
Note that if ZONE_DMA is off then ZONES_WIDTH may become
0 and therefore also ZONES_PGSHIFT is zero.
If you then do
#define ZONEID_PGSHIFT ZONES_PGSHIFT
then ZONEID_PGSHIFT will be 0!
So this could be an issue for the optional ZONE_DMA patch.
Could you also make sure that ZONEID_PGSHIFT is set correctly even if
ZONES_WIDTH is zero?
This affects the optional ZONE_DMA patch. zone table removal should be
fine with just the above patch.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-27 11:27 ` [PATCH] Get rid of zone_table V2 Andy Whitcroft
@ 2006-09-27 16:24 ` Andrew Morton
2006-09-30 18:47 ` Christoph Lameter
1 sibling, 0 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-27 16:24 UTC (permalink / raw)
To: Andy Whitcroft; +Cc: Christoph Lameter, linux-mm, Dave Hansen
On Wed, 27 Sep 2006 12:27:48 +0100
Andy Whitcroft <apw@shadowen.org> wrote:
> Andrew Morton wrote:
> > On Sun, 24 Sep 2006 03:06:43 -0700
> > Andrew Morton <akpm@osdl.org> wrote:
> >
> >> On Mon, 18 Sep 2006 12:21:35 -0700 (PDT)
> >> Christoph Lameter <clameter@sgi.com> wrote:
> >>
> >>> static inline int page_zone_id(struct page *page)
> >>> {
> >>> - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK;
> >>> -}
> >>> -static inline struct zone *page_zone(struct page *page)
> >>> -{
> >>> - return zone_table[page_zone_id(page)];
> >>> + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
> >>> }
> >> arm allmodconfig:
> >>
> >> include/linux/mm.h: In function `page_zone_id':
> >> include/linux/mm.h:450: warning: right shift count >= width of type
>
> On a separate note. I was able to get this puppy compiling enough to
> see this warning and fix it, but nowhere close to compiling a kernel.
>
> First, are you compiling here to a real kernel. If so, is this on a
> cross-compiler or a real system. If its a cross-compiler which version.
> Do you have recipe for success?
I use cross-compilers basically all the time.
$ARCH = alpha CT=gcc-4.1.0-glibc-2.3.6
$ARCH = arm CT=gcc-3.4.5-glibc-2.3.6
$ARCH = i386 CT=gcc-4.1.0-glibc-2.3.6
$ARCH = ia64 CT=gcc-3.4.5-glibc-2.3.6
$ARCH = m68k CT=gcc-4.1.0-glibc-2.3.6
$ARCH = mips CT=gcc-3.4.5-glibc-2.3.6
$ARCH = powerpc CT=gcc-4.1.0-glibc-2.3.6
$ARCH = s390 CT=gcc-4.1.0-glibc-2.3.6
$ARCH = sh CT=gcc-3.4.5-glibc-2.3.6
$ARCH = sparc CT=gcc-4.1.0-glibc-2.3.6
$ARCH = sparc64 CT=gcc-3.4.5-glibc-2.3.6
$ARCH = x86_64 CT=gcc-4.0.2-glibc-2.3.6
> I'd like to be incoporating more cross-compilation testing into our
> nightlies, but this one isn't playing ball.
>
Sens error messages and .config?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-27 11:27 ` [PATCH] Get rid of zone_table V2 Andy Whitcroft
2006-09-27 16:24 ` Andrew Morton
@ 2006-09-30 18:47 ` Christoph Lameter
2006-09-30 18:48 ` Christoph Lameter
2006-09-30 20:08 ` Andrew Morton
1 sibling, 2 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-30 18:47 UTC (permalink / raw)
To: Andy Whitcroft; +Cc: Andrew Morton, linux-mm, Dave Hansen
There is still a problem after Andy's patch in connection with Optional
ZONE DMA that I pointed out earlier. If we only have a single zone then
ZONEID_PGSHIFT == 0. This is fine for the non NUMA case in which we have
only a single zone and ZONEID_MASK == 0 too. page_zone_id() will always be
0.
In the NUMA case we still have one zone per node. Thus we need to have a
correct ZONEID_PGSHIFT in order to isolate the node number from page
flags. Right now we take NODES_SHIFT bits starting from 0!
I am not exactly sure how to fix that the right way given the complex
nested macros. Andy may know.
The following patch checks for that condition. We can only allow
ZONEID_PGSHIFT to be zero if the ZONEID_MASK is also zero. (We cannot
check that with an #if because ZONEID_SHIFT contains a "sizeof(...)"
element)
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.18-mm2/include/linux/mm.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/mm.h 2006-09-30 13:22:27.732989275 -0500
+++ linux-2.6.18-mm2/include/linux/mm.h 2006-09-30 13:23:06.604463587 -0500
@@ -447,6 +447,7 @@ static inline enum zone_type page_zonenu
*/
static inline int page_zone_id(struct page *page)
{
+ BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-30 18:47 ` Christoph Lameter
@ 2006-09-30 18:48 ` Christoph Lameter
2006-09-30 20:08 ` Andrew Morton
1 sibling, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-09-30 18:48 UTC (permalink / raw)
To: Andy Whitcroft; +Cc: Andrew Morton, linux-mm, Dave Hansen
Insure that ZONEID_PGSHIFT is set even if ZONES_WIDTH is 0.
Andy needs to review this.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.18-mm2/include/linux/mm.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/mm.h 2006-09-30 13:23:06.604463587 -0500
+++ linux-2.6.18-mm2/include/linux/mm.h 2006-09-30 13:33:27.443455364 -0500
@@ -421,7 +421,12 @@ void split_page(struct page *page, unsig
#else
#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
#endif
+
+#if ZONES_WIDTH > 0
#define ZONEID_PGSHIFT ZONES_PGSHIFT
+#else
+#define ZONEID_PGSHIFT NODES_PGOFF
+#endif
#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-30 18:47 ` Christoph Lameter
2006-09-30 18:48 ` Christoph Lameter
@ 2006-09-30 20:08 ` Andrew Morton
2006-10-02 15:57 ` Christoph Lameter
2006-10-02 17:10 ` Christoph Lameter
1 sibling, 2 replies; 36+ messages in thread
From: Andrew Morton @ 2006-09-30 20:08 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andy Whitcroft, linux-mm, Dave Hansen
On Sat, 30 Sep 2006 11:47:48 -0700 (PDT)
Christoph Lameter <clameter@sgi.com> wrote:
> There is still a problem after Andy's patch in connection with Optional
> ZONE DMA that I pointed out earlier. If we only have a single zone then
> ZONEID_PGSHIFT == 0. This is fine for the non NUMA case in which we have
> only a single zone and ZONEID_MASK == 0 too. page_zone_id() will always be
> 0.
>
> In the NUMA case we still have one zone per node. Thus we need to have a
> correct ZONEID_PGSHIFT in order to isolate the node number from page
> flags. Right now we take NODES_SHIFT bits starting from 0!
>
> I am not exactly sure how to fix that the right way given the complex
> nested macros. Andy may know.
>
> The following patch checks for that condition. We can only allow
> ZONEID_PGSHIFT to be zero if the ZONEID_MASK is also zero. (We cannot
> check that with an #if because ZONEID_SHIFT contains a "sizeof(...)"
> element)
>
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
>
> Index: linux-2.6.18-mm2/include/linux/mm.h
> ===================================================================
> --- linux-2.6.18-mm2.orig/include/linux/mm.h 2006-09-30 13:22:27.732989275 -0500
> +++ linux-2.6.18-mm2/include/linux/mm.h 2006-09-30 13:23:06.604463587 -0500
> @@ -447,6 +447,7 @@ static inline enum zone_type page_zonenu
> */
> static inline int page_zone_id(struct page *page)
> {
> + BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
> return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
> }
>
BUILD_BUG_ON()?
What patch is this patch patching? get-rid-of-zone_table.patch or one of
the ZONE_DMA-optionality ones?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-30 20:08 ` Andrew Morton
@ 2006-10-02 15:57 ` Christoph Lameter
2006-10-02 17:10 ` Christoph Lameter
1 sibling, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-10-02 15:57 UTC (permalink / raw)
To: Andrew Morton; +Cc: Andy Whitcroft, linux-mm, Dave Hansen
On Sat, 30 Sep 2006, Andrew Morton wrote:
> What patch is this patch patching? get-rid-of-zone_table.patch or one of
> the ZONE_DMA-optionality ones?
It addresses breakage in optional ZONE DMA introduced by Andy Whitcrofts
fix to get-rid-of-zone_table.patch.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-09-30 20:08 ` Andrew Morton
2006-10-02 15:57 ` Christoph Lameter
@ 2006-10-02 17:10 ` Christoph Lameter
2006-10-04 10:10 ` Andy Whitcroft
2006-10-06 14:45 ` [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT Andy Whitcroft
1 sibling, 2 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-10-02 17:10 UTC (permalink / raw)
To: Andrew Morton; +Cc: Andy Whitcroft, linux-mm, Dave Hansen
On Sat, 30 Sep 2006, Andrew Morton wrote:
> BUILD_BUG_ON()?
Good idea. We may want to take all of these patches out if Andy can come
up with an easy modification to the macros that avoids ZONEID_PGSHIFT to
unintentionally become 0.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Index: linux-2.6.18-mm2/include/linux/mm.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/mm.h 2006-09-30 13:33:27.000000000 -0500
+++ linux-2.6.18-mm2/include/linux/mm.h 2006-10-02 12:08:14.387946148 -0500
@@ -452,7 +452,7 @@ static inline enum zone_type page_zonenu
*/
static inline int page_zone_id(struct page *page)
{
- BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
+ BUILD_BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] Get rid of zone_table V2
2006-10-02 17:10 ` Christoph Lameter
@ 2006-10-04 10:10 ` Andy Whitcroft
2006-10-06 14:45 ` [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT Andy Whitcroft
1 sibling, 0 replies; 36+ messages in thread
From: Andy Whitcroft @ 2006-10-04 10:10 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, linux-mm, Dave Hansen
Christoph Lameter wrote:
> On Sat, 30 Sep 2006, Andrew Morton wrote:
>
>> BUILD_BUG_ON()?
>
> Good idea. We may want to take all of these patches out if Andy can come
> up with an easy modification to the macros that avoids ZONEID_PGSHIFT to
> unintentionally become 0.
Sorry, been out handling a unrelated disaster at work, and then sick.
Will look at this and see what I can come up with.
-apw
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT
2006-10-02 17:10 ` Christoph Lameter
2006-10-04 10:10 ` Andy Whitcroft
@ 2006-10-06 14:45 ` Andy Whitcroft
2006-10-06 17:00 ` Christoph Lameter
1 sibling, 1 reply; 36+ messages in thread
From: Andy Whitcroft @ 2006-10-06 14:45 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, linux-mm, Andy Whitcroft, Dave Hansen
Ok, it seems the sensible thing is to actually calculate
ZONEID_PGSHIFT and ZONEID_SHIFT off of the actual fields we want
it to contain. The id is either the NODE,ZONE or the SECTION,ZONE
depending on whats available. For each field we have the location
of its right hand edge. The right hand edge of the combined field
is the lower of the two. It is possible for there to be no bits
in our identifier in the non-numa single zone case, so we also need
to take the overall width being zero (to avoid a shift error).
I think the following should do the trick. I've tested this outside
the kernel in all the combinations I could think of and it seems to
do the right thing. I've build tested on a few machines, but there
seems to be so many unrelated problems about with 2.6.19-mm3 that
I've not tested it as extensivly as I might prefer.
How does this look, against 2.6.19-mm3.
-apw
=== 8< ===
zoneid: fix up calculations for ZONEID_PGSHIFT
Currently if we have a non-zero ZONES_SHIFT we assume we are able
to rely on that as the bottom edge of the ZONEID, if not then we
use the NODES_PGOFF as the right end of either NODES _or_ SECTION.
This latter is more luck than judgement and would be incorrect if
we reordered the SECTION,NODE,ZONE options in the fields space.
Really what we want is the lower of the right hand end of the two
fields we are using (either NODE,ZONE or SECTION,ZONE). Codify that
explicitly. As always allow for there being no bits in either of
the fields, such as might be valid in a non-numa machine with only
a zone NORMAL.
I have checked that the compiler is still able to constant fold
all of this away correctly.
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
---
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d9d0b46..98ed057 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -418,15 +418,15 @@ #define ZONES_PGSHIFT (ZONES_PGOFF * (Z
/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */
#ifdef NODE_NOT_IN_PAGEFLAGS
#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
+#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \
+ SECTIONS_PGOFF : ZONES_PGOFF)
#else
#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
+#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \
+ NODES_PGOFF : ZONES_PGOFF)
#endif
-#if ZONES_WIDTH > 0
-#define ZONEID_PGSHIFT ZONES_PGSHIFT
-#else
-#define ZONEID_PGSHIFT NODES_PGOFF
-#endif
+#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))
#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
@@ -452,7 +452,6 @@ static inline enum zone_type page_zonenu
*/
static inline int page_zone_id(struct page *page)
{
- BUILD_BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT
2006-10-06 14:45 ` [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT Andy Whitcroft
@ 2006-10-06 17:00 ` Christoph Lameter
0 siblings, 0 replies; 36+ messages in thread
From: Christoph Lameter @ 2006-10-06 17:00 UTC (permalink / raw)
To: Andy Whitcroft; +Cc: Andrew Morton, linux-mm, Dave Hansen
On Fri, 6 Oct 2006, Andy Whitcroft wrote:
> How does this look, against 2.6.19-mm3.
Looks fine to me. Its against 2.6.18-mm3 I think (unless you have some way
to do time travel..)
Acked-by: Christoph Lameter <clameter@sgi.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
end of thread, other threads:[~2006-10-06 17:00 UTC | newest]
Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-18 19:21 [PATCH] Get rid of zone_table V2 Christoph Lameter
2006-09-18 20:28 ` Andrew Morton
2006-09-18 22:51 ` Christoph Lameter
2006-09-18 23:15 ` Andrew Morton
2006-09-18 23:46 ` Christoph Lameter
2006-09-18 23:58 ` Andrew Morton
2006-09-19 0:08 ` Christoph Lameter
2006-09-19 0:14 ` Christoph Lameter
2006-09-19 0:31 ` Andrew Morton
2006-09-19 1:20 ` Christoph Lameter
2006-09-19 6:12 ` Christoph Lameter
2006-09-19 6:33 ` Andrew Morton
2006-09-19 14:10 ` Christoph Lameter
2006-09-19 15:38 ` Andrew Morton
2006-09-19 15:41 ` Christoph Lameter
2006-09-19 16:23 ` Nick Piggin
2006-09-19 16:45 ` Nick Piggin
2006-09-19 17:50 ` Christoph Lameter
2006-09-19 18:24 ` Andi Kleen
2006-09-19 16:17 ` Nick Piggin
2006-09-24 10:06 ` Andrew Morton
2006-09-24 16:58 ` Christoph Lameter
2006-09-27 9:19 ` Andrew Morton
2006-09-27 9:26 ` Andy Whitcroft
2006-09-27 11:23 ` [PATCH] zone table removal miss merge Andy Whitcroft
2006-09-27 16:19 ` Christoph Lameter
2006-09-27 11:27 ` [PATCH] Get rid of zone_table V2 Andy Whitcroft
2006-09-27 16:24 ` Andrew Morton
2006-09-30 18:47 ` Christoph Lameter
2006-09-30 18:48 ` Christoph Lameter
2006-09-30 20:08 ` Andrew Morton
2006-10-02 15:57 ` Christoph Lameter
2006-10-02 17:10 ` Christoph Lameter
2006-10-04 10:10 ` Andy Whitcroft
2006-10-06 14:45 ` [PATCH] zoneid fix up calculations for ZONEID_PGSHIFT Andy Whitcroft
2006-10-06 17:00 ` Christoph Lameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox