linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] numa: mempolicy: Allow tunable policy for system init.
@ 2007-06-05  4:15 Paul Mundt
  0 siblings, 0 replies; only message in thread
From: Paul Mundt @ 2007-06-05  4:15 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm

The current default behaviour for system init (via numa_policy_init())
is to use MPOL_INTERLEAVE across the online nodes in order to avoid a
preference for node 0. This tends to be undesirable for small nodes that
really would rather prefer to keep as many allocations on node 0 as
possible.

As tmpfs already provides a parser for the policy and nodelist --
shmem_parse_mpol(), we generalize this and wrap in to it via an mpolinit=
(for lack of a better name) setup param. Other code that wishes to do
mempolicy parsing for itself can use the new mpol_parse_options().

As an example, for small nodes, one might prefer to boot with
'mpolinit=prefer:0'. numa_default_policy() will still overload this
with MPOL_DEFAULT later on anyways, so this is only useful for system
init.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

--

 Documentation/kernel-parameters.txt |    6 ++
 include/linux/mempolicy.h           |    8 +++
 mm/mempolicy.c                      |   81 +++++++++++++++++++++++++++++++++---
 mm/shmem.c                          |   52 -----------------------
 4 files changed, 91 insertions(+), 56 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ce91560..1b77073 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1070,6 +1070,12 @@ and is between 256 and 4096 characters. It is defined in the file
 	mousedev.yres=	[MOUSE] Vertical screen resolution, used for devices
 			reporting absolute coordinates, such as tablets
 
+	mpolinit=	[KNL,NUMA]
+			Format: <policy>,[:<nodelist>]
+			Sets the default memory policy to be used at system
+			init time. Defaults to MPOL_INTERLEAVE between online
+			nodes.
+
 	mpu401=		[HW,OSS]
 			Format: <io>,<irq>
 
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index daabb3a..471fd25 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -148,6 +148,8 @@ extern void mpol_rebind_task(struct task_struct *tsk,
 					const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
+extern int mpol_parse_options(char *value, int *policy,
+			      nodemask_t *policy_nodes);
 #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
 
 #ifdef CONFIG_CPUSETS
@@ -253,6 +255,12 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
 {
 }
 
+static inline int mpol_parse_options(char *value, int *policy,
+				     nodemask_t *policy_nodes)
+{
+	return 1;
+}
+
 #define set_cpuset_being_rebound(x) do {} while (0)
 
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d76e8eb..f5c5e04 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -89,7 +89,7 @@
 #include <linux/migrate.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
-
+#include <linux/ctype.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
@@ -1594,9 +1594,72 @@ void mpol_free_shared_policy(struct shared_policy *p)
 	spin_unlock(&p->lock);
 }
 
+int mpol_parse_options(char *value, int *policy, nodemask_t *policy_nodes)
+{
+	char *nodelist = strchr(value, ':');
+	int err = 1;
+
+	if (nodelist) {
+		/* NUL-terminate policy string */
+		*nodelist++ = '\0';
+		if (nodelist_parse(nodelist, *policy_nodes))
+			goto out;
+	}
+	if (!strcmp(value, "default")) {
+		*policy = MPOL_DEFAULT;
+		/* Don't allow a nodelist */
+		if (!nodelist)
+			err = 0;
+	} else if (!strcmp(value, "prefer")) {
+		*policy = MPOL_PREFERRED;
+		/* Insist on a nodelist of one node only */
+		if (nodelist) {
+			char *rest = nodelist;
+			while (isdigit(*rest))
+				rest++;
+			if (!*rest)
+				err = 0;
+		}
+	} else if (!strcmp(value, "bind")) {
+		*policy = MPOL_BIND;
+		/* Insist on a nodelist */
+		if (nodelist)
+			err = 0;
+	} else if (!strcmp(value, "interleave")) {
+		*policy = MPOL_INTERLEAVE;
+		/* Default to nodes online if no nodelist */
+		if (!nodelist)
+			*policy_nodes = node_online_map;
+		err = 0;
+	}
+out:
+	/* Restore string for error message */
+	if (nodelist)
+		*--nodelist = ':';
+	return err;
+}
+
+/* Set interleaving policy for system init. This way not all
+   the data structures allocated at system boot end up in node zero. */
+static nodemask_t nmask_sysinit __initdata;
+static int policy_sysinit __initdata = MPOL_INTERLEAVE;
+
+static int __init setup_mpol_sysinit(char *str)
+{
+	if (mpol_parse_options(str, &policy_sysinit, &nmask_sysinit)) {
+		printk("mpolinit failed, falling back on interleave\n");
+		return 0;
+	}
+
+	return 1;
+}
+__setup("mpolinit=", setup_mpol_sysinit);
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
+	nodemask_t *nmask;
+
 	policy_cache = kmem_cache_create("numa_policy",
 					 sizeof(struct mempolicy),
 					 0, SLAB_PANIC, NULL, NULL);
@@ -1605,11 +1668,19 @@ void __init numa_policy_init(void)
 				     sizeof(struct sp_node),
 				     0, SLAB_PANIC, NULL, NULL);
 
-	/* Set interleaving policy for system init. This way not all
-	   the data structures allocated at system boot end up in node zero. */
+	/*
+	 * Use the specified nodemask for init, or fall back to
+	 * node_online_map.
+	 */
+	if (policy_sysinit == MPOL_DEFAULT)
+		nmask = NULL;
+	else if (!nodes_empty(nmask_sysinit))
+		nmask = &nmask_sysinit;
+	else
+		nmask = &node_online_map;
 
-	if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map))
-		printk("numa_policy_init: interleaving failed\n");
+	if (do_set_mempolicy(policy_sysinit, nmask))
+		printk("numa_policy_init: setting init policy failed\n");
 }
 
 /* Reset policy of current process to default */
diff --git a/mm/shmem.c b/mm/shmem.c
index e537317..ca3f59d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -957,51 +957,6 @@ redirty:
 }
 
 #ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
-{
-	char *nodelist = strchr(value, ':');
-	int err = 1;
-
-	if (nodelist) {
-		/* NUL-terminate policy string */
-		*nodelist++ = '\0';
-		if (nodelist_parse(nodelist, *policy_nodes))
-			goto out;
-	}
-	if (!strcmp(value, "default")) {
-		*policy = MPOL_DEFAULT;
-		/* Don't allow a nodelist */
-		if (!nodelist)
-			err = 0;
-	} else if (!strcmp(value, "prefer")) {
-		*policy = MPOL_PREFERRED;
-		/* Insist on a nodelist of one node only */
-		if (nodelist) {
-			char *rest = nodelist;
-			while (isdigit(*rest))
-				rest++;
-			if (!*rest)
-				err = 0;
-		}
-	} else if (!strcmp(value, "bind")) {
-		*policy = MPOL_BIND;
-		/* Insist on a nodelist */
-		if (nodelist)
-			err = 0;
-	} else if (!strcmp(value, "interleave")) {
-		*policy = MPOL_INTERLEAVE;
-		/* Default to nodes online if no nodelist */
-		if (!nodelist)
-			*policy_nodes = node_online_map;
-		err = 0;
-	}
-out:
-	/* Restore string for error message */
-	if (nodelist)
-		*--nodelist = ':';
-	return err;
-}
-
 static struct page *shmem_swapin_async(struct shared_policy *p,
 				       swp_entry_t entry, unsigned long idx)
 {
@@ -1054,11 +1009,6 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
 	return page;
 }
 #else
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
-{
-	return 1;
-}
-
 static inline struct page *
 shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 {
@@ -2184,7 +2134,7 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid,
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (shmem_parse_mpol(value,policy,policy_nodes))
+			if (mpol_parse_options(value,policy,policy_nodes))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2007-06-05  4:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-05  4:15 [PATCH] numa: mempolicy: Allow tunable policy for system init Paul Mundt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox