linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH]Remove pgdat list ver.2 [1/2]
@ 2005-09-30 13:07 Yasunori Goto
  2005-09-30 15:09 ` Dave Hansen
  2005-09-30 16:07 ` Dave Hansen
  0 siblings, 2 replies; 5+ messages in thread
From: Yasunori Goto @ 2005-09-30 13:07 UTC (permalink / raw)
  To: Dave Hansen; +Cc: linux-mm, linux-ia64

Hi. Dave-san.

I updated patches to remove pgdat link. They are for 2.6.14-rc2.
Please include this in your -mhp patch set.

Bye.

------------------------
 This patch is to remove pgdat link list from pgdat structure, 
because I think it is redundant.
In the current implementation, pgdat structure has this link list.
struct pglist_data{
        :
   struct pglist_data *pgdat_next;
        :
}
This is used for searching other zones and nodes by for_each_pgdat and
for_each_zone macros. So, if a node is hot added,
the system has to not only set bit of node_online_map,
but also connect this for new node.
However, all of pgdat linklist user would like to know just
next (online) node. So, I think node_online_map is enough information
for them to find other nodes. And hot add/remove code will be simpler.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>

Index: pgdat_link/include/linux/mmzone.h
===================================================================
--- pgdat_link.orig/include/linux/mmzone.h	2005-09-30 19:04:26.181589425 +0900
+++ pgdat_link/include/linux/mmzone.h	2005-09-30 19:05:53.018502424 +0900
@@ -12,6 +12,7 @@
 #include <linux/threads.h>
 #include <linux/numa.h>
 #include <linux/init.h>
+#include <linux/nodemask.h>
 #include <asm/atomic.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -293,8 +294,6 @@ typedef struct pglist_data {
 #endif
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
-extern struct pglist_data *pgdat_list;
-
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat);
 void get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -319,19 +318,25 @@ unsigned long __init node_memmap_size_by
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
+#define first_online_pgdat() NODE_DATA(first_online_node())
+#define next_online_pgdat(pgdat)				\
+	((next_online_node((pgdat)->node_id) != MAX_NUMNODES) ?	\
+	 NODE_DATA(next_online_node((pgdat)->node_id)) : NULL)
+
 /**
- * for_each_pgdat - helper macro to iterate over all nodes
+ * for_each_pgdat - helper macro to iterate over all online nodes
  * @pgdat - pointer to a pg_data_t variable
  *
  * Meant to help with common loops of the form
- * pgdat = pgdat_list;
+ * pgdat = NODE_DATA(first_online_node())
  * while(pgdat) {
  * 	...
- * 	pgdat = pgdat->pgdat_next;
+ * 	pgdat = (next node is online) ? NODE_DATA(next_node) : NULL ;
  * }
  */
 #define for_each_pgdat(pgdat) \
-	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
+	for (pgdat = first_online_pgdat(); pgdat;	\
+	      pgdat = next_online_pgdat(pgdat))
 
 /*
  * next_zone - helper magic for for_each_zone()
@@ -343,11 +348,14 @@ static inline struct zone *next_zone(str
 
 	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
 		zone++;
-	else if (pgdat->pgdat_next) {
-		pgdat = pgdat->pgdat_next;
-		zone = pgdat->node_zones;
-	} else
-		zone = NULL;
+	else {
+		pgdat = next_online_pgdat(pgdat);
+
+		if (pgdat)
+			zone = pgdat->node_zones;
+	        else
+			zone = NULL;
+	}
 
 	return zone;
 }
@@ -360,7 +368,7 @@ static inline struct zone *next_zone(str
  * fills it in. This basically means for_each_zone() is an
  * easier to read version of this piece of code:
  *
- * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
+ * for (pgdat = first_online_node(); pgdat; pgdat = next_online_node(pgdat))
  * 	for (i = 0; i < MAX_NR_ZONES; ++i) {
  * 		struct zone * z = pgdat->node_zones + i;
  * 		...
@@ -368,7 +376,8 @@ static inline struct zone *next_zone(str
  * }
  */
 #define for_each_zone(zone) \
-	for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone))
+	for (zone = first_online_pgdat()->node_zones;	\
+	     zone; zone = next_zone(zone))
 
 static inline int is_highmem_idx(int idx)
 {
Index: pgdat_link/include/linux/nodemask.h
===================================================================
--- pgdat_link.orig/include/linux/nodemask.h	2005-06-20 14:19:50.000000000 +0900
+++ pgdat_link/include/linux/nodemask.h	2005-09-30 19:05:00.894479625 +0900
@@ -232,6 +232,9 @@ static inline int __next_node(int n, con
 	return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
 }
 
+#define first_online_node() first_node(node_online_map)
+#define next_online_node(node) next_node((node), node_online_map)
+
 #define nodemask_of_node(node)						\
 ({									\
 	typeof(_unused_nodemask_arg_) m;				\
Index: pgdat_link/mm/bootmem.c
===================================================================
--- pgdat_link.orig/mm/bootmem.c	2005-09-30 19:04:26.633737857 +0900
+++ pgdat_link/mm/bootmem.c	2005-09-30 19:05:00.895456187 +0900
@@ -61,17 +61,6 @@ static unsigned long __init init_bootmem
 {
 	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize = ((end - start)+7)/8;
-	static struct pglist_data *pgdat_last;
-
-	pgdat->pgdat_next = NULL;
-	/* Add new nodes last so that bootmem always starts
-	   searching in the first nodes, not the last ones */
-	if (pgdat_last)
-		pgdat_last->pgdat_next = pgdat;
-	else {
-		pgdat_list = pgdat; 	
-		pgdat_last = pgdat;
-	}
 
 	mapsize = ALIGN(mapsize, sizeof(long));
 	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
@@ -392,7 +381,7 @@ unsigned long __init free_all_bootmem (v
 
 void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
 {
-	pg_data_t *pgdat = pgdat_list;
+	pg_data_t *pgdat;
 	void *ptr;
 
 	for_each_pgdat(pgdat)
Index: pgdat_link/mm/page_alloc.c
===================================================================
--- pgdat_link.orig/mm/page_alloc.c	2005-09-30 19:04:26.645456607 +0900
+++ pgdat_link/mm/page_alloc.c	2005-09-30 19:05:00.897409312 +0900
@@ -47,7 +47,6 @@ nodemask_t node_online_map __read_mostly
 EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
@@ -2025,8 +2024,9 @@ static void *frag_start(struct seq_file 
 	pg_data_t *pgdat;
 	loff_t node = *pos;
 
-	for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
-		--node;
+	for_each_pgdat(pgdat)
+		if (!node--)
+			break;
 
 	return pgdat;
 }
@@ -2036,7 +2036,7 @@ static void *frag_next(struct seq_file *
 	pg_data_t *pgdat = (pg_data_t *)arg;
 
 	(*pos)++;
-	return pgdat->pgdat_next;
+	return next_online_pgdat(pgdat);
 }
 
 static void frag_stop(struct seq_file *m, void *arg)


-- 
Yasunori Goto 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]Remove pgdat list ver.2 [1/2]
  2005-09-30 13:07 [PATCH]Remove pgdat list ver.2 [1/2] Yasunori Goto
@ 2005-09-30 15:09 ` Dave Hansen
  2005-09-30 16:03   ` Dave Hansen
  2005-09-30 16:07 ` Dave Hansen
  1 sibling, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2005-09-30 15:09 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: linux-mm, ia64 list

On Fri, 2005-09-30 at 22:07 +0900, Yasunori Goto wrote:
> I updated patches to remove pgdat link. They are for 2.6.14-rc2.
> Please include this in your -mhp patch set.

Looks very nice.  I'll pull them in.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]Remove pgdat list ver.2 [1/2]
  2005-09-30 15:09 ` Dave Hansen
@ 2005-09-30 16:03   ` Dave Hansen
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-30 16:03 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: linux-mm, ia64 list

On Fri, 2005-09-30 at 08:09 -0700, Dave Hansen wrote:
> On Fri, 2005-09-30 at 22:07 +0900, Yasunori Goto wrote:
> > I updated patches to remove pgdat link. They are for 2.6.14-rc2.
> > Please include this in your -mhp patch set.
> 
> Looks very nice.  I'll pull them in.

I spoke too soon :)

linux/mmzone.h uses the !NUMA NODE_DATA() before it is declared.  I'm
seeing if I can work around it now.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]Remove pgdat list ver.2 [1/2]
  2005-09-30 13:07 [PATCH]Remove pgdat list ver.2 [1/2] Yasunori Goto
  2005-09-30 15:09 ` Dave Hansen
@ 2005-09-30 16:07 ` Dave Hansen
  2005-10-03  5:19   ` Yasunori Goto
  1 sibling, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2005-09-30 16:07 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: linux-mm, ia64 list

[-- Attachment #1: Type: text/plain, Size: 125 bytes --]

This works around my compile problem for now.  But, it might cause some
more issues.  Can you take a closer look?



-- Dave

[-- Attachment #2: no-pgdat-list-fix.patch --]
[-- Type: text/x-patch, Size: 4178 bytes --]



---

 memhotplug-dave/include/linux/mmzone.h |  104 ++++++++++++++++-----------------
 1 files changed, 53 insertions(+), 51 deletions(-)

diff -puN include/linux/mmzone.h~no-pgdat-list-fix include/linux/mmzone.h
--- memhotplug/include/linux/mmzone.h~no-pgdat-list-fix	2005-09-30 08:59:56.000000000 -0700
+++ memhotplug-dave/include/linux/mmzone.h	2005-09-30 09:06:10.000000000 -0700
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/seqlock.h>
 #include <asm/atomic.h>
+#include <asm/mmzone.h>
 #include <asm/semaphore.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -342,6 +343,58 @@ static inline void memory_present(int ni
 unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
 #endif
 
+static inline int is_highmem_idx(int idx)
+{
+	return (idx == ZONE_HIGHMEM);
+}
+
+static inline int is_normal_idx(int idx)
+{
+	return (idx == ZONE_NORMAL);
+}
+/**
+ * is_highmem - helper function to quickly check if a struct zone is a 
+ *              highmem zone or not.  This is an attempt to keep references
+ *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
+ * @zone - pointer to struct zone variable
+ */
+static inline int is_highmem(struct zone *zone)
+{
+	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+}
+
+static inline int is_normal(struct zone *zone)
+{
+	return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
+}
+
+/* These two functions are used to setup the per zone pages min values */
+struct ctl_table;
+struct file;
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
+					void __user *, size_t *, loff_t *);
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+					void __user *, size_t *, loff_t *);
+
+#include <linux/topology.h>
+/* Returns the number of the current Node. */
+#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+
+extern struct pglist_data contig_page_data;
+#define NODE_DATA(nid)		(&contig_page_data)
+#define NODE_MEM_MAP(nid)	mem_map
+#define MAX_NODES_SHIFT		1
+#define pfn_to_nid(pfn)		(0)
+
+#else /* CONFIG_NEED_MULTIPLE_NODES */
+
+#include <asm/mmzone.h>
+
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+
 /*
  * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
  */
@@ -408,57 +461,6 @@ static inline struct zone *next_zone(str
 	for (zone = first_online_pgdat()->node_zones;	\
 	     zone; zone = next_zone(zone))
 
-static inline int is_highmem_idx(int idx)
-{
-	return (idx == ZONE_HIGHMEM);
-}
-
-static inline int is_normal_idx(int idx)
-{
-	return (idx == ZONE_NORMAL);
-}
-/**
- * is_highmem - helper function to quickly check if a struct zone is a 
- *              highmem zone or not.  This is an attempt to keep references
- *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
- * @zone - pointer to struct zone variable
- */
-static inline int is_highmem(struct zone *zone)
-{
-	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
-}
-
-static inline int is_normal(struct zone *zone)
-{
-	return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
-}
-
-/* These two functions are used to setup the per zone pages min values */
-struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
-					void __user *, size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
-					void __user *, size_t *, loff_t *);
-
-#include <linux/topology.h>
-/* Returns the number of the current Node. */
-#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-
-extern struct pglist_data contig_page_data;
-#define NODE_DATA(nid)		(&contig_page_data)
-#define NODE_MEM_MAP(nid)	mem_map
-#define MAX_NODES_SHIFT		1
-#define pfn_to_nid(pfn)		(0)
-
-#else /* CONFIG_NEED_MULTIPLE_NODES */
-
-#include <asm/mmzone.h>
-
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
 #ifdef CONFIG_SPARSEMEM
 #include <asm/sparsemem.h>
_

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH]Remove pgdat list ver.2 [1/2]
  2005-09-30 16:07 ` Dave Hansen
@ 2005-10-03  5:19   ` Yasunori Goto
  0 siblings, 0 replies; 5+ messages in thread
From: Yasunori Goto @ 2005-10-03  5:19 UTC (permalink / raw)
  To: Dave Hansen; +Cc: linux-mm, ia64 list

> This works around my compile problem for now.  But, it might cause some
> more issues.  Can you take a closer look?

It works well in my ia64 box.
But, I have not understood why this patch moves also the lines from
is_highmem_idx() to lowmem_reserve_ratio_sysctl_handler() yet.
Is it necessary?
If no, the patch becomes a bit smaller. :-)

Thanks.

-- 
Yasunori Goto 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-10-03  5:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-09-30 13:07 [PATCH]Remove pgdat list ver.2 [1/2] Yasunori Goto
2005-09-30 15:09 ` Dave Hansen
2005-09-30 16:03   ` Dave Hansen
2005-09-30 16:07 ` Dave Hansen
2005-10-03  5:19   ` Yasunori Goto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox