linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 0/4] unify both copies of build_zonelists()
@ 2005-09-20 17:23 Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 1/4] build_zonelists(): create zone_index_to_type() helper Dave Hansen
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-20 17:23 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, Dave Hansen

There are currently two copies of build_zonelists(): one
for NUMA systems, and one for flat systems.  The following
patches make the NUMA case work for the flat case as well.

This set is a little more thorough than the single patch
I posted last week.

I'd like these to get a run in -mm if there aren't any
objections.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 1/4] build_zonelists(): create zone_index_to_type() helper
  2005-09-20 17:23 [RFC][PATCH 0/4] unify both copies of build_zonelists() Dave Hansen
@ 2005-09-20 17:23 ` Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 2/4] build_zonelists(): abstract node_load[] operations Dave Hansen
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-20 17:23 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, Dave Hansen

The two build_zonelists() do identical conversions from a
zone index variable (__GFP_*) to a zone type variable
(ZONE_*).  Create a common helper.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/mm/page_alloc.c |   24 ++++++++++++++----------
 1 files changed, 14 insertions(+), 10 deletions(-)

diff -puN mm/page_alloc.c~B1-build_zonelists_unification mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~B1-build_zonelists_unification	2005-09-14 09:32:37.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2005-09-14 09:32:37.000000000 -0700
@@ -1451,6 +1451,18 @@ static int __init build_zonelists_node(p
 	return j;
 }
 
+static inline zone_index_to_type(int index)
+{
+	int type = ZONE_NORMAL;
+
+	if (index & __GFP_HIGHMEM)
+		type = ZONE_HIGHMEM;
+	if (index & __GFP_DMA)
+		type = ZONE_DMA;
+	return type;
+}
+
+
 #ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __initdata node_load[MAX_NUMNODES];
@@ -1547,11 +1559,7 @@ static void __init build_zonelists(pg_da
 			zonelist = pgdat->node_zonelists + i;
 			for (j = 0; zonelist->zones[j] != NULL; j++);
 
-			k = ZONE_NORMAL;
-			if (i & __GFP_HIGHMEM)
-				k = ZONE_HIGHMEM;
-			if (i & __GFP_DMA)
-				k = ZONE_DMA;
+			k = zone_index_to_type(i);
 
 	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
 			zonelist->zones[j] = NULL;
@@ -1572,11 +1580,7 @@ static void __init build_zonelists(pg_da
 		zonelist = pgdat->node_zonelists + i;
 
 		j = 0;
-		k = ZONE_NORMAL;
-		if (i & __GFP_HIGHMEM)
-			k = ZONE_HIGHMEM;
-		if (i & __GFP_DMA)
-			k = ZONE_DMA;
+		k = zone_index_to_type(i);
 
  		j = build_zonelists_node(pgdat, zonelist, j, k);
  		/*
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 2/4] build_zonelists(): abstract node_load[] operations
  2005-09-20 17:23 [RFC][PATCH 0/4] unify both copies of build_zonelists() Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 1/4] build_zonelists(): create zone_index_to_type() helper Dave Hansen
@ 2005-09-20 17:23 ` Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 3/4] build_zonelists() unification: don't re-zero zonelist Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 4/4] unify both copies of build_zonelists() Dave Hansen
  3 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-20 17:23 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, Dave Hansen

We're shortly going to use find_next_best_node() for both
NUMA and non-NUMA configurations.  So, take node_load[],
and hide it behind a couple of helper functions that are
noops when NUMA is off.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/mm/page_alloc.c |   22 +++++++++++++++++++---
 1 files changed, 19 insertions(+), 3 deletions(-)

diff -puN mm/page_alloc.c~B1.1-build_zonelists_unification mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~B1.1-build_zonelists_unification	2005-09-14 09:32:38.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2005-09-14 09:32:38.000000000 -0700
@@ -1463,9 +1463,25 @@ static inline zone_index_to_type(int ind
 }
 
 
-#ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
+
+#ifdef CONFIG_NUMA
 static int __initdata node_load[MAX_NUMNODES];
+static int __init get_node_load(int node)
+{
+	return node_load[node];
+}
+static void __init increment_node_load(int node, int load)
+{
+	node_load[node] += load;
+}
+#else
+static inline int get_node_load(int node)
+{
+	return 0;
+}
+static inline void increment_node_load(int node, int load) {}
+#endif
 /**
  * find_next_best_node - find the next node that should appear in a given node's fallback list
  * @node: node whose fallback list we're appending
@@ -1512,7 +1528,7 @@ static int __init find_next_best_node(in
 
 		/* Slight preference for less loaded node */
 		val *= (MAX_NODE_LOAD*MAX_NUMNODES);
-		val += node_load[n];
+		val += get_node_load(n);
 
 		if (val < min_val) {
 			min_val = val;
@@ -1552,7 +1568,7 @@ static void __init build_zonelists(pg_da
 		 */
 		if (node_distance(local_node, node) !=
 				node_distance(local_node, prev_node))
-			node_load[node] += load;
+			increment_node_load(node, load);
 		prev_node = node;
 		load--;
 		for (i = 0; i < GFP_ZONETYPES; i++) {
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 3/4] build_zonelists() unification: don't re-zero zonelist
  2005-09-20 17:23 [RFC][PATCH 0/4] unify both copies of build_zonelists() Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 1/4] build_zonelists(): create zone_index_to_type() helper Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 2/4] build_zonelists(): abstract node_load[] operations Dave Hansen
@ 2005-09-20 17:23 ` Dave Hansen
  2005-09-20 17:23 ` [RFC][PATCH 4/4] unify both copies of build_zonelists() Dave Hansen
  3 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-20 17:23 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, Dave Hansen

The pgdats, and thus the zonelists are either statically
allocated in BSS, cleared by the bootmem allocator, or
cleared by arch code such as remapped_pgdat_init(). There
is no need to re-zero them here

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/mm/page_alloc.c |    6 ------
 1 files changed, 6 deletions(-)

diff -puN mm/page_alloc.c~B1.2-build_zonelists_unification mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~B1.2-build_zonelists_unification	2005-09-14 09:32:38.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2005-09-14 09:32:38.000000000 -0700
@@ -1549,12 +1549,6 @@ static void __init build_zonelists(pg_da
 	struct zonelist *zonelist;
 	nodemask_t used_mask;
 
-	/* initialize zonelists */
-	for (i = 0; i < GFP_ZONETYPES; i++) {
-		zonelist = pgdat->node_zonelists + i;
-		zonelist->zones[0] = NULL;
-	}
-
 	/* NUMA-aware ordering of nodes */
 	local_node = pgdat->node_id;
 	load = num_online_nodes();
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 4/4] unify both copies of build_zonelists()
  2005-09-20 17:23 [RFC][PATCH 0/4] unify both copies of build_zonelists() Dave Hansen
                   ` (2 preceding siblings ...)
  2005-09-20 17:23 ` [RFC][PATCH 3/4] build_zonelists() unification: don't re-zero zonelist Dave Hansen
@ 2005-09-20 17:23 ` Dave Hansen
  3 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-09-20 17:23 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, Dave Hansen

Once the last three patches are applied, find_next_best_node()
has three properties which make its behavior identical to the
way that the !NUMA build_zonelists() functions.

First, this code from build_zonelists():

	j = 0;
	k = zone_index_to_type(i);
	j = build_zonelists_node(pgdat, zonelist, j, k);
	
makes sure that the node for which we're building a zonelist is
first in that zone's list.  That is functionally equivalent to
this code in find_next_best_node():

	/* Use the local node if we haven't already */
	if (!node_isset(node, *used_node_mask)) {
		best_node = node;
		break;
	}

Next, the !NUMA build_zonelists() starts at the local node,
and searches all larger-numbered nodes, then searches all
nodes from 0 back up to the local node:

        for (node = local_node + 1; node < MAX_NUMNODES; node++) {
                if (!node_online(node))
          	     	continue;
        	j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
        }
        for (node = 0; node < local_node; node++) {
                if (!node_online(node))
                	continue;
                j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
        }

Instead of doing this explicitly, find_next_best_node()
uses a modulo but, again, the behavior is the same:

        /* Start from local node */
        n = (node+i) % num_online_nodes();

Lastly, "val" will be equivalent for each find_next_best_node() loop
iteration because:
1. node_distance() always return the same value (except when
   node == n, but that never happens because of the local node
   check above)
2. The 'if (!cpus_empty(tmp))' check will never succeed because
   the cpumask for !NUMA is cpu_online_map, which is never empty.
3. (MAX_NODE_LOAD*MAX_NUMNODES) == 1
4. get_node_load() == 0

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/mm/page_alloc.c |   41 ----------------------------------------
 1 files changed, 41 deletions(-)

diff -puN mm/page_alloc.c~B1.3-build_zonelists_unification mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~B1.3-build_zonelists_unification	2005-09-14 09:32:39.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2005-09-14 09:32:39.000000000 -0700
@@ -1577,47 +1577,6 @@ static void __init build_zonelists(pg_da
 	}
 }
 
-#else	/* CONFIG_NUMA */
-
-static void __init build_zonelists(pg_data_t *pgdat)
-{
-	int i, j, k, node, local_node;
-
-	local_node = pgdat->node_id;
-	for (i = 0; i < GFP_ZONETYPES; i++) {
-		struct zonelist *zonelist;
-
-		zonelist = pgdat->node_zonelists + i;
-
-		j = 0;
-		k = zone_index_to_type(i);
-
- 		j = build_zonelists_node(pgdat, zonelist, j, k);
- 		/*
- 		 * Now we build the zonelist so that it contains the zones
- 		 * of all the other nodes.
- 		 * We don't want to pressure a particular node, so when
- 		 * building the zones for node N, we make sure that the
- 		 * zones coming right after the local ones are those from
- 		 * node N+1 (modulo N)
- 		 */
-		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
-			if (!node_online(node))
-				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
-		}
-		for (node = 0; node < local_node; node++) {
-			if (!node_online(node))
-				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
-		}
-
-		zonelist->zones[j] = NULL;
-	}
-}
-
-#endif	/* CONFIG_NUMA */
-
 void __init build_all_zonelists(void)
 {
 	int i;
diff -L b.txt -puN /dev/null /dev/null
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-09-20 17:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-09-20 17:23 [RFC][PATCH 0/4] unify both copies of build_zonelists() Dave Hansen
2005-09-20 17:23 ` [RFC][PATCH 1/4] build_zonelists(): create zone_index_to_type() helper Dave Hansen
2005-09-20 17:23 ` [RFC][PATCH 2/4] build_zonelists(): abstract node_load[] operations Dave Hansen
2005-09-20 17:23 ` [RFC][PATCH 3/4] build_zonelists() unification: don't re-zero zonelist Dave Hansen
2005-09-20 17:23 ` [RFC][PATCH 4/4] unify both copies of build_zonelists() Dave Hansen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox