* [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy
@ 2006-03-10 19:37 Lee Schermerhorn
2006-03-13 23:52 ` Christoph Lameter
0 siblings, 1 reply; 4+ messages in thread
From: Lee Schermerhorn @ 2006-03-10 19:37 UTC (permalink / raw)
To: linux-mm
AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy
This patch introduces the mm/mempolicy.c "migrate_task_memory()" function.
When called, this function will migrate all possible task pages with default
policy that are not located on the node that contains the current task's cpu.
migrate_task_memory() operates on one vma at at time, filtering out those
that don't have default policy and that have no access. Added helper
function migrate_vma_to_node()--a slight variant of migrate_to_node()--that
takes a vma instead of an mm struct. Changed comment on migrate_to_node()
to indicate that it operates on entire mm.
I had to move get_vma_policy() up in mempolicy.c so that I could reference
it from migrate_task_memory(). Should I have just added a forward ref
declaration?
Subsequent patches will arrange for this function to be called when a task
returns to user space after the scheduler migrates it to a cpu on a node
different from the node where it last executed.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Index: linux-2.6.16-rc5-git6/include/linux/mempolicy.h
===================================================================
--- linux-2.6.16-rc5-git6.orig/include/linux/mempolicy.h 2006-03-02 16:40:38.000000000 -0500
+++ linux-2.6.16-rc5-git6/include/linux/mempolicy.h 2006-03-02 16:48:02.000000000 -0500
@@ -172,6 +172,8 @@ static inline void check_highest_zone(in
int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+extern void migrate_task_memory(void);
+
extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */
#else
@@ -263,6 +265,8 @@ static inline int do_migrate_pages(struc
return 0;
}
+static inline void migrate_task_memory(void) { }
+
static inline void check_highest_zone(int k)
{
}
Index: linux-2.6.16-rc5-git6/mm/mempolicy.c
===================================================================
--- linux-2.6.16-rc5-git6.orig/mm/mempolicy.c 2006-03-02 16:40:44.000000000 -0500
+++ linux-2.6.16-rc5-git6/mm/mempolicy.c 2006-03-06 12:55:27.000000000 -0500
@@ -112,6 +112,24 @@ struct mempolicy default_policy = {
.policy = MPOL_DEFAULT,
};
+/* Return effective policy for a VMA */
+static struct mempolicy * get_vma_policy(struct task_struct *task,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ struct mempolicy *pol = task->mempolicy;
+
+ if (vma) {
+ if (vma->vm_ops && vma->vm_ops->get_policy)
+ pol = vma->vm_ops->get_policy(vma, addr);
+ else if (vma->vm_policy &&
+ vma->vm_policy->policy != MPOL_DEFAULT)
+ pol = vma->vm_policy;
+ }
+ if (!pol)
+ pol = &default_policy;
+ return pol;
+}
+
/* Do sanity checking on a policy */
static int mpol_check_policy(int mode, nodemask_t *nodes)
{
@@ -629,7 +647,7 @@ out:
}
/*
- * Migrate pages from one node to a target node.
+ * Migrate all eligible pages mapped in mm from source node to destination node.
* Returns error or the number of pages not migrated.
*/
int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags)
@@ -734,6 +752,97 @@ int do_migrate_pages(struct mm_struct *m
return busy;
}
+
+/*
+ * Migrate all eligible pages mapped in vma NOT on destination node to
+ * the destination node.
+ * Returns error or the number of pages not migrated.
+ */
+static int migrate_vma_to_node(struct vm_area_struct *vma, int dest, int flags)
+{
+ nodemask_t nmask;
+ LIST_HEAD(pagelist);
+ int err = 0;
+
+ nodes_clear(nmask);
+ node_set(dest, nmask);
+
+ vma = check_range(vma->vm_mm, vma->vm_start, vma->vm_end, &nmask,
+ flags | MPOL_MF_INVERT, /* pages NOT on dest */
+ &pagelist);
+
+ if (IS_ERR(vma))
+ err = PTR_ERR(vma);
+ else if (!list_empty(&pagelist))
+ err = migrate_pages_to(&pagelist, NULL, dest);
+
+ if (!list_empty(&pagelist))
+ putback_lru_pages(&pagelist);
+ return err;
+}
+
+/*
+ * for filtering 'no access' segments
+TODO: what are these?
+ */
+static inline int vma_no_access(struct vm_area_struct *vma)
+{
+ const int VM_RWX = VM_READ|VM_WRITE|VM_EXEC;
+
+ return (vma->vm_flags & VM_RWX) == 0;
+}
+
+/**
+ * migrate_task_memory()
+ *
+ * Called just before returning to user state when a task has been
+ * migrated to a new node by the schedule and sched_migrate_memory
+ * is enabled. Walks the current task's mm_struct's vma list and
+ * migrates pages of eligible vmas to the new node. Eligible
+ * vmas are those with null or default memory policy, because
+ * default policy depends on local/home node.
+ */
+
+void migrate_task_memory(void)
+{
+ struct mm_struct *mm = NULL;
+ struct vm_area_struct *vma;
+ int dest;
+
+ BUG_ON(irqs_disabled());
+
+ mm = current->mm;
+ /*
+ * we're returning to user space, so mm must be non-NULL
+ */
+ BUG_ON(!mm);
+
+ /*
+ * migrate eligible vma's pages
+ */
+ dest = cpu_to_node(task_cpu(current));
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ struct mempolicy *pol = get_vma_policy(current, vma,
+ vma->vm_start);
+ int err;
+
+ if (pol->policy != MPOL_DEFAULT)
+ continue;
+ if (vma_no_access(vma))
+ continue;
+
+ // TODO: more eligibility filtering?
+
+ // TODO: more agressive migration ['MOVE_ALL] ?
+ // via sysctl?
+ err = migrate_vma_to_node(vma, dest, MPOL_MF_MOVE);
+
+ }
+ up_read(&mm->mmap_sem);
+
+}
+
long do_mbind(unsigned long start, unsigned long len,
unsigned long mode, nodemask_t *nmask, unsigned long flags)
{
@@ -1067,24 +1176,6 @@ asmlinkage long compat_sys_mbind(compat_
#endif
-/* Return effective policy for a VMA */
-static struct mempolicy * get_vma_policy(struct task_struct *task,
- struct vm_area_struct *vma, unsigned long addr)
-{
- struct mempolicy *pol = task->mempolicy;
-
- if (vma) {
- if (vma->vm_ops && vma->vm_ops->get_policy)
- pol = vma->vm_ops->get_policy(vma, addr);
- else if (vma->vm_policy &&
- vma->vm_policy->policy != MPOL_DEFAULT)
- pol = vma->vm_policy;
- }
- if (!pol)
- pol = &default_policy;
- return pol;
-}
-
/* Return a zonelist representing a mempolicy */
static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
{
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy
2006-03-10 19:37 [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy Lee Schermerhorn
@ 2006-03-13 23:52 ` Christoph Lameter
2006-03-14 14:46 ` Lee Schermerhorn
0 siblings, 1 reply; 4+ messages in thread
From: Christoph Lameter @ 2006-03-13 23:52 UTC (permalink / raw)
To: Lee Schermerhorn; +Cc: linux-mm
On Fri, 10 Mar 2006, Lee Schermerhorn wrote:
> +/*
> + * Migrate all eligible pages mapped in vma NOT on destination node to
> + * the destination node.
> + * Returns error or the number of pages not migrated.
> + */
> +static int migrate_vma_to_node(struct vm_area_struct *vma, int dest, int flags)
> +{
This duplicates code in migrate_to_node().
> +/*
> + * for filtering 'no access' segments
> +TODO: what are these?
??
> + down_read(&mm->mmap_sem);
> + for (vma = mm->mmap; vma; vma = vma->vm_next) {
> + struct mempolicy *pol = get_vma_policy(current, vma,
> + vma->vm_start);
> + int err;
> +
> + if (pol->policy != MPOL_DEFAULT)
> + continue;
> + if (vma_no_access(vma))
> + continue;
> +
> + // TODO: more eligibility filtering?
> +
> + // TODO: more agressive migration ['MOVE_ALL] ?
> + // via sysctl?
> + err = migrate_vma_to_node(vma, dest, MPOL_MF_MOVE);
> +
> + }
Duplicates code in migrate_to_node().
Could you add some special casing instead to migrate_to_node and/or
check_range?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy
2006-03-13 23:52 ` Christoph Lameter
@ 2006-03-14 14:46 ` Lee Schermerhorn
2006-03-14 15:36 ` Christoph Lameter
0 siblings, 1 reply; 4+ messages in thread
From: Lee Schermerhorn @ 2006-03-14 14:46 UTC (permalink / raw)
To: Christoph Lameter; +Cc: linux-mm
On Mon, 2006-03-13 at 15:52 -0800, Christoph Lameter wrote:
> On Fri, 10 Mar 2006, Lee Schermerhorn wrote:
>
> > +/*
> > + * Migrate all eligible pages mapped in vma NOT on destination node to
> > + * the destination node.
> > + * Returns error or the number of pages not migrated.
> > + */
> > +static int migrate_vma_to_node(struct vm_area_struct *vma, int dest, int flags)
> > +{
>
> This duplicates code in migrate_to_node().
Yes. At this point, this is intentional. I wanted to be able to see
what I'm doing.
More below...
>
> > +/*
> > + * for filtering 'no access' segments
> > +TODO: what are these?
>
> ??
>
> > + down_read(&mm->mmap_sem);
> > + for (vma = mm->mmap; vma; vma = vma->vm_next) {
> > + struct mempolicy *pol = get_vma_policy(current, vma,
> > + vma->vm_start);
> > + int err;
> > +
> > + if (pol->policy != MPOL_DEFAULT)
> > + continue;
> > + if (vma_no_access(vma))
> > + continue;
> > +
> > + // TODO: more eligibility filtering?
> > +
> > + // TODO: more agressive migration ['MOVE_ALL] ?
> > + // via sysctl?
> > + err = migrate_vma_to_node(vma, dest, MPOL_MF_MOVE);
> > +
> > + }
>
> Duplicates code in migrate_to_node().
Again, yes...
>
> Could you add some special casing instead to migrate_to_node and/or
> check_range?
I think this could be done. Don't know whether the results would be
"pretty" or not.
Currently, you'll note that I'm calling check_range for one vma at a
time. I'm not sure this is a good idea. It probably adds overhead
revisiting upper level page table pages many times. But, I want to
compare different approaches. If I use migrate_to_node() and it's call
to check_range(), I would have to have something like the above logic to
do the per vma stuff. But, why per vma? I agree it doesn't make a lot
of sense for the kernel build workload. I find very few eligible pages
to migrate, so even if I scanned the entire mm at once, the resulting
page list would be very small. However, I was concerned about tying up
a large number of pages, isolated from the LRU, for applications with
larger footprints. I'm also going to experiment with more agressive
migration--i.e., selecting pages with >1 map counts. This may result in
larger numbers of pages migrating.
But, I have thought about adding internal flags to steer different paths
through migrate_to_node() and check_range(). If we ever get serious
about including an automigration mechanism like this, I'll go ahead and
take a look at it.
Lee
Lee
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy
2006-03-14 14:46 ` Lee Schermerhorn
@ 2006-03-14 15:36 ` Christoph Lameter
0 siblings, 0 replies; 4+ messages in thread
From: Christoph Lameter @ 2006-03-14 15:36 UTC (permalink / raw)
To: Lee Schermerhorn; +Cc: linux-mm
On Tue, 14 Mar 2006, Lee Schermerhorn wrote:
> > Could you add some special casing instead to migrate_to_node and/or
> > check_range?
>
> I think this could be done. Don't know whether the results would be
> "pretty" or not.
Make it as pretty as possible.
> Currently, you'll note that I'm calling check_range for one vma at a
> time. I'm not sure this is a good idea. It probably adds overhead
> revisiting upper level page table pages many times. But, I want to
> compare different approaches. If I use migrate_to_node() and it's call
> to check_range(), I would have to have something like the above logic to
> do the per vma stuff. But, why per vma? I agree it doesn't make a lot
> of sense for the kernel build workload. I find very few eligible pages
> to migrate, so even if I scanned the entire mm at once, the resulting
> page list would be very small. However, I was concerned about tying up
> a large number of pages, isolated from the LRU, for applications with
> larger footprints. I'm also going to experiment with more agressive
Well if you just find a few pages to migrate then the pages isolated from
the LRU will also be few.
> migration--i.e., selecting pages with >1 map counts. This may result in
> larger numbers of pages migrating.
Yes and doing so may stall the concurrent compiler passes.
> But, I have thought about adding internal flags to steer different paths
> through migrate_to_node() and check_range(). If we ever get serious
> about including an automigration mechanism like this, I'll go ahead and
> take a look at it.
Ok.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2006-03-14 15:36 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-10 19:37 [PATCH/RFC] AutoPage Migration - V0.1 - 1/8 migrate task memory with default policy Lee Schermerhorn
2006-03-13 23:52 ` Christoph Lameter
2006-03-14 14:46 ` Lee Schermerhorn
2006-03-14 15:36 ` Christoph Lameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox