* Re: [PATCH 2.6.17-rc1-mm1 1/9] AutoPage Migration - V0.2 - migrate task memory with default policy
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
@ 2006-04-07 20:37 ` Lee Schermerhorn
2006-04-07 20:37 ` [PATCH 2.6.17-rc1-mm1 2/9] AutoPage Migration - V0.2 - add auto_migrate_enable sysctl Lee Schermerhorn
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:37 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 1/9 migrate task memory with default policy
Define mempolicy.c internal flag for auto-migration. This flag
will select auto-migration specific behavior in the existing
page migration functions.
Add auto_migrate_task_memory() to mempolicy.c. This function sets up
to call migrate_to_node() with internal flags for auto-migration.
Modify vma_migratable(), called from check_range(), to skip VMAs that
don't have default policy when auto-migrating. To do this,
vma_migratable() needs the MPOL flags.
I had to move get_vma_policy() up in mempolicy.c so that I could reference
it from vma_migratable(). Should I have just added a forward ref?
Subsequent patches will arrange for auto_migrate_task_memory() to be
called when a task returns to user space after the scheduler migrates
it to a cpu on a node different from the node where it last executed.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/mempolicy.c 2006-03-23 16:49:22.000000000 -0500
+++ linux-2.6.16-mm1/mm/mempolicy.c 2006-03-23 16:49:34.000000000 -0500
@@ -92,9 +92,14 @@
#include <asm/uaccess.h>
/* Internal flags */
-#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
-#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
-#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
+#define MPOL_MF_DISCONTIG_OK \
+ (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
+#define MPOL_MF_INVERT \
+ (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
+#define MPOL_MF_STATS \
+ (MPOL_MF_INTERNAL << 2) /* Gather statistics */
+#define MPOL_MF_AUTOMIGRATE \
+ (MPOL_MF_INTERNAL << 3) /* auto-migrating task memory */
static struct kmem_cache *policy_cache;
static struct kmem_cache *sn_cache;
@@ -110,6 +115,24 @@ struct mempolicy default_policy = {
.policy = MPOL_DEFAULT,
};
+/* Return effective policy for a VMA */
+static struct mempolicy * get_vma_policy(struct task_struct *task,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ struct mempolicy *pol = task->mempolicy;
+
+ if (vma) {
+ if (vma->vm_ops && vma->vm_ops->get_policy)
+ pol = vma->vm_ops->get_policy(vma, addr);
+ else if (vma->vm_policy &&
+ vma->vm_policy->policy != MPOL_DEFAULT)
+ pol = vma->vm_policy;
+ }
+ if (!pol)
+ pol = &default_policy;
+ return pol;
+}
+
/* Do sanity checking on a policy */
static int mpol_check_policy(int mode, nodemask_t *nodes)
{
@@ -309,11 +332,17 @@ static inline int check_pgd_range(struct
}
/* Check if a vma is migratable */
-static inline int vma_migratable(struct vm_area_struct *vma)
+static inline int vma_migratable(struct vm_area_struct *vma, int flags)
{
if (vma->vm_flags & (
VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
return 0;
+ if (flags & MPOL_MF_AUTOMIGRATE) {
+ struct mempolicy *pol =
+ get_vma_policy(current, vma, vma->vm_start);
+ if (pol->policy != MPOL_DEFAULT)
+ return 0;
+ }
return 1;
}
@@ -350,7 +379,7 @@ check_range(struct mm_struct *mm, unsign
if (!is_vm_hugetlb_page(vma) &&
((flags & MPOL_MF_STRICT) ||
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
- vma_migratable(vma)))) {
+ vma_migratable(vma, flags)))) {
unsigned long endvma = vma->vm_end;
if (endvma > end)
@@ -695,6 +724,33 @@ int do_migrate_pages(struct mm_struct *m
}
+/**
+ * auto_migrate_task_memory()
+ *
+ * Called just before returning to user state when a task has been
+ * migrated to a new node by the schedule and sched_migrate_memory
+ * is enabled.
+ */
+void auto_migrate_task_memory(void)
+{
+ struct mm_struct *mm = NULL;
+ int dest = cpu_to_node(task_cpu(current));
+ int flags = MPOL_MF_MOVE | MPOL_MF_INVERT | MPOL_MF_AUTOMIGRATE;
+
+ mm = current->mm;
+ /*
+ * we're returning to user space, so mm must be non-NULL
+ */
+ BUG_ON(!mm);
+
+ /*
+ * Pass destination node as source node plus 'INVERT flag:
+ * Migrate all pages NOT on destination node.
+ * 'AUTOMIGRATE flag selects only VMAs with default policy
+ */
+ migrate_to_node(mm, dest, dest, flags);
+}
+
#else
static void migrate_page_add(struct page *page, struct list_head *pagelist,
@@ -1049,24 +1105,6 @@ asmlinkage long compat_sys_mbind(compat_
#endif
-/* Return effective policy for a VMA */
-static struct mempolicy * get_vma_policy(struct task_struct *task,
- struct vm_area_struct *vma, unsigned long addr)
-{
- struct mempolicy *pol = task->mempolicy;
-
- if (vma) {
- if (vma->vm_ops && vma->vm_ops->get_policy)
- pol = vma->vm_ops->get_policy(vma, addr);
- else if (vma->vm_policy &&
- vma->vm_policy->policy != MPOL_DEFAULT)
- pol = vma->vm_policy;
- }
- if (!pol)
- pol = &default_policy;
- return pol;
-}
-
/* Return a zonelist representing a mempolicy */
static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
{
Index: linux-2.6.16-mm1/include/linux/auto-migrate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16-mm1/include/linux/auto-migrate.h 2006-03-23 16:49:34.000000000 -0500
@@ -0,0 +1,20 @@
+#ifndef _LINUX_AUTO_MIGRATE_H
+#define _LINUX_AUTO_MIGRATE_H
+
+/*
+ * minimal memory migration definitions need by scheduler,
+ * sysctl, ..., so that they don't need to drag in the entire
+ * migrate.h and all that it depends on.
+ */
+
+#include <linux/config.h>
+
+#ifdef CONFIG_MIGRATION
+
+extern void auto_migrate_task_memory(void);
+
+#else /* !CONFIG_MIGRATION */
+
+#endif /* CONFIG_MIGRATION */
+
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 2/9] AutoPage Migration - V0.2 - add auto_migrate_enable sysctl
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
2006-04-07 20:37 ` [PATCH 2.6.17-rc1-mm1 1/9] AutoPage Migration - V0.2 - migrate task memory with default policy Lee Schermerhorn
@ 2006-04-07 20:37 ` Lee Schermerhorn
2006-04-07 20:38 ` [PATCH 2.6.17-rc1-mm1 3/9] AutoPage Migration - V0.2 - generic check/notify internode migration Lee Schermerhorn
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:37 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 2/9 add auto_migrate_enable sysctl
V0.2: moved controls to mm/migrate.c
renamed "sched_migrate_memory" to "auto_migrate_enable"
This patch adds the infrastructure for "migration controls" under
/sys/kernel/migration. It also adds a single such control--
auto_migrate_enable--to enable/disable automatic, scheduler driven
task memory migration. May also be initialized from boot command
line option.
Default is disabled!
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/mm/migrate.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/migrate.c 2006-03-23 16:49:16.000000000 -0500
+++ linux-2.6.16-mm1/mm/migrate.c 2006-03-23 16:49:40.000000000 -0500
@@ -25,8 +25,7 @@
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/swapops.h>
-
-#include "internal.h"
+#include <linux/sysfs.h>
#include "internal.h"
@@ -36,6 +35,76 @@
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
/*
+ * System Controls for [auto] migration
+ */
+#define MIGRATION_ATTR_RW(_name) \
+static struct subsys_attribute _name##_attr = \
+ __ATTR(_name, 0644, _name##_show, _name##_store)
+
+/*
+ * auto_migrate_enable: boot option and sysctl to enable/disable
+ * memory migration on inter-node task migration due to scheduler
+ * load balancing or change in cpu affinity.
+ */
+int auto_migrate_enable = 0;
+
+static int __init set_auto_migrate_enable(char *str)
+{
+ get_option(&str, &auto_migrate_enable);
+ return 1;
+}
+
+__setup("auto_migrate_enable", set_auto_migrate_enable);
+
+static ssize_t auto_migrate_enable_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "auto_migrate_enable %s\n",
+ auto_migrate_enable ? "on" : "off");
+}
+static ssize_t auto_migrate_enable_store(struct subsystem *subsys,
+ const char *page, size_t count)
+{
+ unsigned long n = simple_strtoul(page, NULL, 10);
+ if (n)
+ auto_migrate_enable = 1;
+ else
+ auto_migrate_enable = 0;
+ return count;
+}
+MIGRATION_ATTR_RW(auto_migrate_enable);
+
+decl_subsys(migration, NULL, NULL);
+EXPORT_SYMBOL(migration_subsys);
+
+static struct attribute *migration_attrs[] = {
+ &auto_migrate_enable_attr.attr,
+ NULL
+};
+
+static struct attribute_group migration_attr_group = {
+ .attrs = migration_attrs,
+};
+
+static int __init migration_control_init(void)
+{
+ int error;
+
+ /*
+ * child of kernel subsys
+ */
+ kset_set_kset_s(&migration_subsys, kernel_subsys);
+ error = subsystem_register(&migration_subsys);
+ if (!error)
+ error = sysfs_create_group(&migration_subsys.kset.kobj,
+ &migration_attr_group);
+ return error;
+}
+subsys_initcall(migration_control_init);
+/*
+ * end Migration System Controls
+ */
+
+/*
* Isolate one page from the LRU lists. If successful put it onto
* the indicated list with elevated page count.
*
Index: linux-2.6.16-mm1/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.16-mm1.orig/include/linux/auto-migrate.h 2006-03-23 16:49:34.000000000 -0500
+++ linux-2.6.16-mm1/include/linux/auto-migrate.h 2006-03-23 16:49:40.000000000 -0500
@@ -13,6 +13,8 @@
extern void auto_migrate_task_memory(void);
+extern int auto_migrate_enable;
+
#else /* !CONFIG_MIGRATION */
#endif /* CONFIG_MIGRATION */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 3/9] AutoPage Migration - V0.2 - generic check/notify internode migration
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
2006-04-07 20:37 ` [PATCH 2.6.17-rc1-mm1 1/9] AutoPage Migration - V0.2 - migrate task memory with default policy Lee Schermerhorn
2006-04-07 20:37 ` [PATCH 2.6.17-rc1-mm1 2/9] AutoPage Migration - V0.2 - add auto_migrate_enable sysctl Lee Schermerhorn
@ 2006-04-07 20:38 ` Lee Schermerhorn
2006-04-07 20:39 ` [PATCH 2.6.17-rc1-mm1 4/9] AutoPage Migration - V0.2 - ia64 " Lee Schermerhorn
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:38 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 3/9 generic check/notify internode migration
V02: renamed migrate_task_memory() to auto_migrate_task_memory().
renamed auto-migration enable control.
This patch adds the check for internode migration to be called
from scheduler load balancing, and the check for migration pending
to be called when a task returning to user space notices 'NOTIFY_PENDING.
Check for internode migration: if automatic memory migration
is enabled [auto_migrate_enable != 0] and this is a user task and the
destination cpu is on a different node from the task's current cpu,
the task will be marked for migration pending via member added to task
struct. The TIF_NOTIFY_PENDING thread_info flag is set to cause the task
to enter do_notify_resume[_user]() to check for migration pending.
When a task is rescheduled to user space with TIF_NOTIFY_PENDING,
it will check for migration pending, unless SIGKILL is pending.
If the task notices migration pending, it will call
auto_migrate_task_memory() to migrate pages in vma's with default
policy. Only default policy is affected by migration to a new node.
Note that we can't call auto_migrate_task_memory() with interrupts
disabled. Temporarily enable interrupts around the call.
These checks become empty macros when 'MIGRATION' is not configured.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.17-rc1-mm1/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc1-mm1.orig/include/linux/sched.h 2006-04-05 10:14:36.000000000 -0400
+++ linux-2.6.17-rc1-mm1/include/linux/sched.h 2006-04-05 10:15:00.000000000 -0400
@@ -908,6 +908,9 @@ struct task_struct {
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
+#ifdef CONFIG_MIGRATION
+ int migrate_pending; /* internode mem migration pending */
+#endif
#endif
#ifdef CONFIG_CPUSETS
struct cpuset *cpuset;
Index: linux-2.6.17-rc1-mm1/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.17-rc1-mm1.orig/include/linux/auto-migrate.h 2006-04-05 10:14:58.000000000 -0400
+++ linux-2.6.17-rc1-mm1/include/linux/auto-migrate.h 2006-04-05 10:15:00.000000000 -0400
@@ -15,8 +15,64 @@ extern void auto_migrate_task_memory(voi
extern int auto_migrate_enable;
+#ifdef _LINUX_SCHED_H /* only used where this is defined */
+static inline void check_internode_migration(task_t *task, int dest_cpu)
+{
+ if (auto_migrate_enable &&
+ task->mm && !(task->flags & PF_BORROWED_MM)) {
+ int node = cpu_to_node(task_cpu(task));
+ if ((node != cpu_to_node(dest_cpu))) {
+ /*
+ * migrating a user task to a new node.
+ * mark for memory migration on return to user space.
+ */
+ struct thread_info *info = task->thread_info;
+ task->migrate_pending = 1;
+ set_bit(TIF_NOTIFY_RESUME, &info->flags);
+ }
+ }
+}
+
+static inline void check_migrate_pending(void)
+{
+ if (!auto_migrate_enable)
+ goto out;
+
+ /*
+ * Don't bother with memory migration prep if 'KILL pending
+ */
+ if (test_thread_flag(TIF_SIGPENDING) &&
+ (sigismember(¤t->pending.signal, SIGKILL) ||
+ sigismember(¤t->signal->shared_pending.signal, SIGKILL)))
+ goto out;
+
+ if (unlikely(current->migrate_pending)) {
+ int disable_irqs = 0;
+
+ if (likely(irqs_disabled())) {
+ disable_irqs = 1;
+ local_irq_enable();
+ }
+
+ auto_migrate_task_memory();
+
+ if (likely(disable_irqs))
+ local_irq_disable();
+ }
+
+out:
+ current->migrate_pending = 0;
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ return;
+}
+#endif /* _LINUX_SCHED_H */
+
#else /* !CONFIG_MIGRATION */
+#define check_internode_migration(t,c) /* NOTHING */
+
+#define check_migrate_pending() /* NOTHING */
+
#endif /* CONFIG_MIGRATION */
#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 4/9] AutoPage Migration - V0.2 - ia64 check/notify internode migration
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (2 preceding siblings ...)
2006-04-07 20:38 ` [PATCH 2.6.17-rc1-mm1 3/9] AutoPage Migration - V0.2 - generic check/notify internode migration Lee Schermerhorn
@ 2006-04-07 20:39 ` Lee Schermerhorn
2006-04-07 20:40 ` [PATCH 2.6.17-rc1-mm1 5/9] AutoPage Migration - V0.2 - x64_64 " Lee Schermerhorn
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:39 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 4/9 ia64 check/notify internode migration
V0.2 - refresh only
This patch hooks the check for task memory migration pending
into the ia64 do_notify_resume() function.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/arch/ia64/kernel/process.c
===================================================================
--- linux-2.6.16-mm1.orig/arch/ia64/kernel/process.c 2006-03-23 11:00:43.000000000 -0500
+++ linux-2.6.16-mm1/arch/ia64/kernel/process.c 2006-03-23 16:49:58.000000000 -0500
@@ -30,6 +30,7 @@
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/auto-migrate.h>
#include <asm/cpu.h>
#include <asm/delay.h>
@@ -172,6 +173,12 @@ do_notify_resume_user (sigset_t *oldset,
pfm_handle_work();
#endif
+ /*
+ * check for task memory migration before delivering
+ * signals so that hander[s] use memory in new node.
+ */
+ check_migrate_pending();
+
/* deal with pending signal delivery */
if (test_thread_flag(TIF_SIGPENDING))
ia64_do_signal(oldset, scr, in_syscall);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 5/9] AutoPage Migration - V0.2 - x64_64 check/notify internode migration
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (3 preceding siblings ...)
2006-04-07 20:39 ` [PATCH 2.6.17-rc1-mm1 4/9] AutoPage Migration - V0.2 - ia64 " Lee Schermerhorn
@ 2006-04-07 20:40 ` Lee Schermerhorn
2006-04-07 20:41 ` [PATCH 2.6.17-rc1-mm1 6/9] AutoPage Migration - V0.2 - hook sched migrate to memory migration Lee Schermerhorn
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:40 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 5/9 x64_64 check/notify internode migration
Hook check for task memory migration for x86_64.
V0.1 -> V0.2: fix type in auto-migrate.h include.
tested on quad-opteron platform
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/arch/x86_64/kernel/signal.c
===================================================================
--- linux-2.6.16-mm1.orig/arch/x86_64/kernel/signal.c 2006-03-23 11:00:44.000000000 -0500
+++ linux-2.6.16-mm1/arch/x86_64/kernel/signal.c 2006-03-23 16:50:04.000000000 -0500
@@ -24,6 +24,8 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/compiler.h>
+#include <linux/auto-migrate.h>
+
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
@@ -493,6 +495,12 @@ void do_notify_resume(struct pt_regs *re
clear_thread_flag(TIF_SINGLESTEP);
}
+ /*
+ * check for task memory migration before delivering
+ * signals so that hander[s] use memory in new node.
+ */
+ check_migrate_pending();
+
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs,oldset);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 6/9] AutoPage Migration - V0.2 - hook sched migrate to memory migration
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (4 preceding siblings ...)
2006-04-07 20:40 ` [PATCH 2.6.17-rc1-mm1 5/9] AutoPage Migration - V0.2 - x64_64 " Lee Schermerhorn
@ 2006-04-07 20:41 ` Lee Schermerhorn
2006-04-07 20:42 ` [PATCH 2.6.17-rc1-mm1 7/9] AutoPage Migration - V0.2 - add hysteresis to internode migration Lee Schermerhorn
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:41 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 6/9 hook sched migrate to memory migration
Add check for internode migration to scheduler -- in most places
where a new cpu is assigned via set_task_cpu(). If MIGRATION is
configured, and auto-migration is enabled [and this is a
user space task], the check will set "migration pending" for the
task if the destination cpu is on a different node from the last
cpu to which the task was assigned. Migration of affected pages
[those with default policy] will occur when the task returns to
user space.
V0.2:
only check/notify task of internode migration in migrate_task()
if not in exec() path. Walking task address space and unmapping
pages is probably a waste of time in this case. Note, however,
that we won't give the task a chance to pull any resident text
or library pages local to itself. If we ever support replication
or more agressive migration, we can fix this.
Thanks to Kamezawa Hiroyoki for pointing out this potential
optimization.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.17-rc1-mm1/kernel/sched.c
===================================================================
--- linux-2.6.17-rc1-mm1.orig/kernel/sched.c 2006-04-05 10:14:36.000000000 -0400
+++ linux-2.6.17-rc1-mm1/kernel/sched.c 2006-04-05 10:16:13.000000000 -0400
@@ -52,8 +52,9 @@
#include <linux/acct.h>
#include <linux/kprobes.h>
#include <linux/kgdb.h>
-#include <asm/tlb.h>
+#include <linux/auto-migrate.h>
+#include <asm/tlb.h>
#include <asm/unistd.h>
/*
@@ -1028,7 +1029,8 @@ typedef struct {
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req,
+ int execing)
{
runqueue_t *rq = task_rq(p);
@@ -1037,6 +1039,8 @@ static int migrate_task(task_t *p, int d
* it is sufficient to simply update the task's cpu field.
*/
if (!p->array && !task_running(rq, p)) {
+ if (!execing)
+ check_internode_migration(p, dest_cpu);
set_task_cpu(p, dest_cpu);
return 0;
}
@@ -1432,6 +1436,7 @@ static int try_to_wake_up(task_t *p, uns
out_set_cpu:
new_cpu = wake_idle(new_cpu, p);
if (new_cpu != cpu) {
+ check_internode_migration(p, new_cpu);
set_task_cpu(p, new_cpu);
task_rq_unlock(rq, &flags);
/* might preempt at this point */
@@ -1944,7 +1949,7 @@ static void sched_migrate_task(task_t *p
goto out;
/* force the process onto the specified CPU */
- if (migrate_task(p, dest_cpu, &req)) {
+ if (migrate_task(p, dest_cpu, &req, 1)) {
/* Need to wait for migration thread (might exit: take ref). */
struct task_struct *mt = rq->migration_thread;
get_task_struct(mt);
@@ -1981,6 +1986,7 @@ void pull_task(runqueue_t *src_rq, prio_
{
dequeue_task(p, src_array);
dec_nr_running(p, src_rq);
+ check_internode_migration(p, this_cpu);
set_task_cpu(p, this_cpu);
inc_nr_running(p, this_rq);
enqueue_task(p, this_array);
@@ -4721,7 +4727,7 @@ int set_cpus_allowed(task_t *p, cpumask_
if (cpu_isset(task_cpu(p), new_mask))
goto out;
- if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+ if (migrate_task(p, any_online_cpu(new_mask), &req, 0)) {
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, &flags);
wake_up_process(rq->migration_thread);
@@ -4763,6 +4769,7 @@ static void __migrate_task(struct task_s
if (!cpu_isset(dest_cpu, p->cpus_allowed))
goto out;
+ check_internode_migration(p, dest_cpu);
set_task_cpu(p, dest_cpu);
if (p->array) {
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 7/9] AutoPage Migration - V0.2 - add hysteresis to internode migration
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (5 preceding siblings ...)
2006-04-07 20:41 ` [PATCH 2.6.17-rc1-mm1 6/9] AutoPage Migration - V0.2 - hook sched migrate to memory migration Lee Schermerhorn
@ 2006-04-07 20:42 ` Lee Schermerhorn
2006-04-07 20:43 ` [PATCH 2.6.17-rc1-mm1 8/9] AutoPage Migration - V0.2 - add max mapcount migration threshold Lee Schermerhorn
2006-04-07 20:45 ` [PATCH 2.6.17-rc1-mm1 9/9] AutoPage Migration - V0.2 - hook automigration to migrate-on-fault Lee Schermerhorn
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:42 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 7/9 add hysteresis to internode migration
V0.2: moved to mm/migrate.c; renamed to "auto_migrate_interval"
This patch adds hysteresis to the internode migration to prevent
page migration trashing when automatic scheduler driven page migration
is enabled.
Add static in-line function "too_soon_for_internode_migration"
[macro => 0 if !CONFIG_MIGRATION] to check for attempts to move
task to a new node sooner than auto_migrate_interval jiffies
after previous migration.
Modify try_to_wakeup() to leave task on its current cpu if too
soon to move it to a different node.
Modify can_migrate_task() to "just say no!" if the load balancer
proposes an internode migration too soon after previous internode
migration.
Added a control variable--auto_migrate_interval--to /sys/kernel/migration
to query/set the interval. Provide some fairly arbitrary min, max and
default values.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.17-rc1-mm1/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc1-mm1.orig/include/linux/sched.h 2006-04-05 10:15:00.000000000 -0400
+++ linux-2.6.17-rc1-mm1/include/linux/sched.h 2006-04-05 10:16:26.000000000 -0400
@@ -909,6 +909,7 @@ struct task_struct {
struct mempolicy *mempolicy;
short il_next;
#ifdef CONFIG_MIGRATION
+ unsigned long next_migrate; /* internode migration hysteresis */
int migrate_pending; /* internode mem migration pending */
#endif
#endif
Index: linux-2.6.17-rc1-mm1/mm/migrate.c
===================================================================
--- linux-2.6.17-rc1-mm1.orig/mm/migrate.c 2006-04-05 10:14:58.000000000 -0400
+++ linux-2.6.17-rc1-mm1/mm/migrate.c 2006-04-05 10:16:26.000000000 -0400
@@ -26,6 +26,7 @@
#include <linux/cpuset.h>
#include <linux/swapops.h>
#include <linux/sysfs.h>
+#include <linux/auto-migrate.h>
#include "internal.h"
@@ -73,11 +74,45 @@ static ssize_t auto_migrate_enable_store
}
MIGRATION_ATTR_RW(auto_migrate_enable);
+/*
+ * auto_migrate_interval: minimum interval between internode
+ * task migration when auto-migration enabled.
+ * units: jiffies
+ */
+unsigned long auto_migrate_interval = AUTO_MIGRATE_INTERVAL_DFLT;
+
+//TODO: __setup function for boot command option
+
+static ssize_t auto_migrate_interval_show(struct subsystem *subsys,
+ char *page)
+{
+ return sprintf(page, "auto_migrate_interval %ld\n",
+ auto_migrate_interval/HZ );
+}
+static ssize_t auto_migrate_interval_store(struct subsystem *subsys,
+ const char *page, size_t count)
+{
+ unsigned long n = simple_strtoul(page, NULL, 10) * HZ;
+
+ /*
+ * silently clip to min/max
+ */
+ if (n < AUTO_MIGRATE_INTERVAL_MIN)
+ auto_migrate_interval = AUTO_MIGRATE_INTERVAL_MIN;
+ else if (n > AUTO_MIGRATE_INTERVAL_MAX)
+ auto_migrate_interval = AUTO_MIGRATE_INTERVAL_MAX;
+ else
+ auto_migrate_interval = n;
+ return count;
+}
+MIGRATION_ATTR_RW(auto_migrate_interval);
+
decl_subsys(migration, NULL, NULL);
EXPORT_SYMBOL(migration_subsys);
static struct attribute *migration_attrs[] = {
&auto_migrate_enable_attr.attr,
+ &auto_migrate_interval_attr.attr,
NULL
};
Index: linux-2.6.17-rc1-mm1/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.17-rc1-mm1.orig/include/linux/auto-migrate.h 2006-04-05 10:15:00.000000000 -0400
+++ linux-2.6.17-rc1-mm1/include/linux/auto-migrate.h 2006-04-05 10:16:26.000000000 -0400
@@ -15,6 +15,11 @@ extern void auto_migrate_task_memory(voi
extern int auto_migrate_enable;
+extern unsigned long auto_migrate_interval; /* seconds <=> jiffies */
+#define AUTO_MIGRATE_INTERVAL_DFLT (30*HZ)
+#define AUTO_MIGRATE_INTERVAL_MIN (5*HZ)
+#define AUTO_MIGRATE_INTERVAL_MAX (300*HZ)
+
#ifdef _LINUX_SCHED_H /* only used where this is defined */
static inline void check_internode_migration(task_t *task, int dest_cpu)
{
@@ -33,6 +38,25 @@ static inline void check_internode_migra
}
}
+/*
+ * To avoids page migration thrashing when auto memory migration is enabled,
+ * check user task for too recent internode migration.
+ */
+static inline int too_soon_for_internode_migration(task_t *task,
+ int this_cpu)
+{
+ if (auto_migrate_enable &&
+ task->mm && !(task->flags & PF_BORROWED_MM) &&
+ cpu_to_node(task_cpu(task)) != cpu_to_node(this_cpu)) {
+
+ if (task->migrate_pending ||
+ time_before(jiffies, task->next_migrate))
+ return 1;
+ }
+
+ return 0;
+}
+
static inline void check_migrate_pending(void)
{
if (!auto_migrate_enable)
@@ -55,6 +79,7 @@ static inline void check_migrate_pending
}
auto_migrate_task_memory();
+ current->next_migrate = jiffies + auto_migrate_interval;
if (likely(disable_irqs))
local_irq_disable();
@@ -70,6 +95,7 @@ out:
#else /* !CONFIG_MIGRATION */
#define check_internode_migration(t,c) /* NOTHING */
+#define too_soon_for_internode_migration(t,c) 0
#define check_migrate_pending() /* NOTHING */
Index: linux-2.6.17-rc1-mm1/kernel/sched.c
===================================================================
--- linux-2.6.17-rc1-mm1.orig/kernel/sched.c 2006-04-05 10:16:13.000000000 -0400
+++ linux-2.6.17-rc1-mm1/kernel/sched.c 2006-04-05 10:16:26.000000000 -0400
@@ -1378,7 +1378,8 @@ static int try_to_wake_up(task_t *p, uns
}
}
- if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+ if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)
+ || too_soon_for_internode_migration(p, this_cpu)))
goto out_set_cpu;
/*
@@ -2013,6 +2014,7 @@ int can_migrate_task(task_t *p, runqueue
* 1) running (obviously), or
* 2) cannot be migrated to this CPU due to cpus_allowed, or
* 3) are cache-hot on their current CPU.
+ * 4) too soon since last internode migration
*/
if (!cpu_isset(this_cpu, p->cpus_allowed))
return 0;
@@ -2021,6 +2023,10 @@ int can_migrate_task(task_t *p, runqueue
if (task_running(rq, p))
return 0;
+// TODO: should this be under Agressive migration?
+ if (too_soon_for_internode_migration(p, this_cpu))
+ return 0;
+
/*
* Aggressive migration if:
* 1) task is cache cold, or
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 8/9] AutoPage Migration - V0.2 - add max mapcount migration threshold
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (6 preceding siblings ...)
2006-04-07 20:42 ` [PATCH 2.6.17-rc1-mm1 7/9] AutoPage Migration - V0.2 - add hysteresis to internode migration Lee Schermerhorn
@ 2006-04-07 20:43 ` Lee Schermerhorn
2006-04-07 20:45 ` [PATCH 2.6.17-rc1-mm1 9/9] AutoPage Migration - V0.2 - hook automigration to migrate-on-fault Lee Schermerhorn
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:43 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 8/9 add max mapcount migration threshold
This patch adds an additional migration control that allows one
to vary the page mapcount threshold above which pages will not
be migrated by MPOL_MF_MOVE. The default value is 1, which yields
the same behavior as before this patch.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.16-mm1.orig/include/linux/auto-migrate.h 2006-03-23 16:50:24.000000000 -0500
+++ linux-2.6.16-mm1/include/linux/auto-migrate.h 2006-03-23 16:50:30.000000000 -0500
@@ -20,6 +20,8 @@ extern unsigned long auto_migrate_interv
#define AUTO_MIGRATE_INTERVAL_MIN (5*HZ)
#define AUTO_MIGRATE_INTERVAL_MAX (300*HZ)
+extern unsigned int migrate_max_mapcount;
+
#ifdef _LINUX_SCHED_H /* only used where this is defined */
static inline void check_internode_migration(task_t *task, int dest_cpu)
{
@@ -98,6 +100,7 @@ out:
#define too_soon_for_internode_migration(t,c) 0
#define check_migrate_pending() /* NOTHING */
+#define migrate_max_mapcount (1)
#endif /* CONFIG_MIGRATION */
Index: linux-2.6.16-mm1/mm/migrate.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/migrate.c 2006-03-23 16:50:24.000000000 -0500
+++ linux-2.6.16-mm1/mm/migrate.c 2006-03-23 16:50:30.000000000 -0500
@@ -107,12 +107,35 @@ static ssize_t auto_migrate_interval_sto
}
MIGRATION_ATTR_RW(auto_migrate_interval);
+/*
+ * migrate_max_mapcount: specify how many mappers allowed
+ * before we won't migrate a page via MPOL_MF_MOVE.
+ */
+unsigned int migrate_max_mapcount = 1; /* default == minimum */
+
+static ssize_t migrate_max_mapcount_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "migrate_max_mapcount %d\n", migrate_max_mapcount);
+}
+static ssize_t migrate_max_mapcount_store(struct subsystem *subsys,
+ const char *page, size_t count)
+{
+ unsigned int n = simple_strtoul(page, NULL, 10);
+ if (n < 1)
+ migrate_max_mapcount = 1;
+ else
+ migrate_max_mapcount = n;
+ return count;
+}
+MIGRATION_ATTR_RW(migrate_max_mapcount);
+
decl_subsys(migration, NULL, NULL);
EXPORT_SYMBOL(migration_subsys);
static struct attribute *migration_attrs[] = {
&auto_migrate_enable_attr.attr,
&auto_migrate_interval_attr.attr,
+ &migrate_max_mapcount_attr.attr,
NULL
};
Index: linux-2.6.16-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/mempolicy.c 2006-03-23 16:49:34.000000000 -0500
+++ linux-2.6.16-mm1/mm/mempolicy.c 2006-03-23 16:50:30.000000000 -0500
@@ -87,6 +87,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/migrate.h>
+#include <linux/auto-migrate.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -452,7 +453,6 @@ static int contextualize_policy(int mode
return mpol_check_policy(mode, nodes);
}
-
/*
* Update task->flags PF_MEMPOLICY bit: set iff non-default
* mempolicy. Allows more rapid checking of this (combined perhaps
@@ -611,9 +611,10 @@ static void migrate_page_add(struct page
unsigned long flags)
{
/*
- * Avoid migrating a page that is shared with others.
+ * Avoid migrating a page that is shared with [too many] others.
*/
- if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
+ if ((flags & MPOL_MF_MOVE_ALL) ||
+ page_mapcount(page) <= migrate_max_mapcount)
isolate_lru_page(page, pagelist);
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2.6.17-rc1-mm1 9/9] AutoPage Migration - V0.2 - hook automigration to migrate-on-fault
2006-04-07 20:32 [PATCH 2.6.17-rc1-mm1 0/9] AutoPage Migration - V0.2 - Overview Lee Schermerhorn
` (7 preceding siblings ...)
2006-04-07 20:43 ` [PATCH 2.6.17-rc1-mm1 8/9] AutoPage Migration - V0.2 - add max mapcount migration threshold Lee Schermerhorn
@ 2006-04-07 20:45 ` Lee Schermerhorn
8 siblings, 0 replies; 10+ messages in thread
From: Lee Schermerhorn @ 2006-04-07 20:45 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.2 - 9/9 hook automigration to migrate-on-fault
Add a /sys/kernel/migration control--auto_migrate_lazy--to use
migrate-on-fault for auto-migration.
Modify migrate_to_node() to just unmap the eligible pages
via migrate_pages_unmap_only() when MPOL_MF_LAZY flag is set.
This patch depends on the "migrate-on-fault" patch series that
defines the MPOL_MF_LAZY flag and the migrate_pages_unmap_only()
function.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/mempolicy.c 2006-03-23 16:50:30.000000000 -0500
+++ linux-2.6.16-mm1/mm/mempolicy.c 2006-03-23 16:50:36.000000000 -0500
@@ -635,7 +635,11 @@ int migrate_to_node(struct mm_struct *mm
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
- err = migrate_pages_to(&pagelist, NULL, dest);
+ if (flags & MPOL_MF_LAZY)
+ err = migrate_pages_unmap_only(&pagelist);
+ else
+ err = migrate_pages_to(&pagelist, NULL, dest);
+
if (!list_empty(&pagelist))
putback_lru_pages(&pagelist);
}
@@ -744,6 +748,9 @@ void auto_migrate_task_memory(void)
*/
BUG_ON(!mm);
+ if (auto_migrate_lazy)
+ flags |= MPOL_MF_LAZY;
+
/*
* Pass destination node as source node plus 'INVERT flag:
* Migrate all pages NOT on destination node.
@@ -1000,7 +1007,6 @@ out:
return err;
}
-
/* Retrieve NUMA policy */
asmlinkage long sys_get_mempolicy(int __user *policy,
unsigned long __user *nmask,
Index: linux-2.6.16-mm1/mm/migrate.c
===================================================================
--- linux-2.6.16-mm1.orig/mm/migrate.c 2006-03-23 16:50:30.000000000 -0500
+++ linux-2.6.16-mm1/mm/migrate.c 2006-03-23 16:50:36.000000000 -0500
@@ -129,6 +129,37 @@ static ssize_t migrate_max_mapcount_stor
}
MIGRATION_ATTR_RW(migrate_max_mapcount);
+/*
+ * auto_migrate_lazy: use "lazy migration"--i.e., migration-on-fault--
+ * for scheduler driven task memory migration.
+ */
+int auto_migrate_lazy = 0;
+
+static int __init set_auto_migrate_lazy(char *str)
+{
+ get_option(&str, &auto_migrate_lazy);
+ return 1;
+}
+
+__setup("auto_migrate_lazy", set_auto_migrate_lazy);
+
+static ssize_t auto_migrate_lazy_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "auto_migrate_lazy %s\n",
+ auto_migrate_lazy ? "on" : "off");
+}
+static ssize_t auto_migrate_lazy_store(struct subsystem *subsys,
+ const char *page, size_t count)
+{
+ unsigned long n = simple_strtoul(page, NULL, 10);
+ if (n)
+ auto_migrate_lazy = 1;
+ else
+ auto_migrate_lazy = 0;
+ return count;
+}
+MIGRATION_ATTR_RW(auto_migrate_lazy);
+
decl_subsys(migration, NULL, NULL);
EXPORT_SYMBOL(migration_subsys);
@@ -136,6 +167,7 @@ static struct attribute *migration_attrs
&auto_migrate_enable_attr.attr,
&auto_migrate_interval_attr.attr,
&migrate_max_mapcount_attr.attr,
+ &auto_migrate_lazy_attr.attr,
NULL
};
Index: linux-2.6.16-mm1/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.16-mm1.orig/include/linux/auto-migrate.h 2006-03-23 16:50:30.000000000 -0500
+++ linux-2.6.16-mm1/include/linux/auto-migrate.h 2006-03-23 16:50:36.000000000 -0500
@@ -21,6 +21,7 @@ extern unsigned long auto_migrate_interv
#define AUTO_MIGRATE_INTERVAL_MAX (300*HZ)
extern unsigned int migrate_max_mapcount;
+extern int auto_migrate_lazy;
#ifdef _LINUX_SCHED_H /* only used where this is defined */
static inline void check_internode_migration(task_t *task, int dest_cpu)
@@ -101,6 +102,7 @@ out:
#define check_migrate_pending() /* NOTHING */
#define migrate_max_mapcount (1)
+#define auto_migrate_lazy (0)
#endif /* CONFIG_MIGRATION */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread