[0/5] memrlimit fixes

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [0/5] memrlimit fixes
@ 2008-06-26  9:28 Balbir Singh
  2008-06-26  9:28 ` [1/5] memrlimit improve error handling Balbir Singh
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki

Hi, Andrew,

These are fixes for the memrlimit cgroup controller. Patch 1, improve
error handling has been redone. Detailed changelog can be found in every
patch. I've tested the patches by running kernbench in a memrlimit
controlled cgroup.

series
------
memrlimit-cgroup-add-better-error-handling
memrlimit-cgroup-fix-attach-task
memrlimit-fix-sleep-in-spinlock-bug
memrlimit-improve-fork-error-handling
memrlimit-fix-move-vma-accounting

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [1/5] memrlimit improve error handling
  2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
@ 2008-06-26  9:28 ` Balbir Singh
  2008-06-26  9:28 ` [2/5] memrlimit handle attach_task() failure, add can_attach() callback Balbir Singh
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki


Andrew,

This is a newer version of the patch, could you please drop the older one
and pick this. Aplogies for the inconvenience.

Changelog v2->v1
1. Address some of Hugh's comments. Reuse error (don't add new ret variable)

TODO: Merge ret & ~PAGE_MASK and vm_expanded.

memrlimit cgroup does not handle error cases after may_expand_vm(). This
BUG was reported by Kamezawa, with the test case below to reproduce it

[root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes
71921664
[root@iridium kamezawa]# ulimit -s 3
[root@iridium kamezawa]# ls
Killed
[root@iridium kamezawa]# ls
Killed
[root@iridium kamezawa]# ls
Killed
[root@iridium kamezawa]# ls
Killed
[root@iridium kamezawa]# ls
Killed
[root@iridium kamezawa]# ulimit -s unlimited
[root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes
72368128
[root@iridium kamezawa]#

This patch adds better handling support to fix the reported problem.

Reported-By: kamezawa.hiroyu@jp.fujitsu.com

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 mm/mmap.c   |   36 +++++++++++++++++++++++++-----------
 mm/mremap.c |    6 ++++++
 2 files changed, 31 insertions(+), 11 deletions(-)

diff -puN mm/mmap.c~memrlimit-cgroup-add-better-error-handling mm/mmap.c
--- linux-2.6.26-rc5/mm/mmap.c~memrlimit-cgroup-add-better-error-handling	2008-06-26 14:38:55.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/mmap.c	2008-06-26 14:38:55.000000000 +0530
@@ -1123,7 +1123,7 @@ munmap_back:
 			 */
 			charged = len >> PAGE_SHIFT;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto undo_charge;
 			vm_flags |= VM_ACCOUNT;
 		}
 	}
@@ -1245,6 +1245,8 @@ free_vma:
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
+undo_charge:
+	memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT);
 	return error;
 }
 
@@ -1540,14 +1542,15 @@ static int acct_stack_growth(struct vm_a
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
 	unsigned long new_start;
+	int ret = -ENOMEM;
 
 	/* address space limit tests */
 	if (!may_expand_vm(mm, grow))
-		return -ENOMEM;
+		goto out;
 
 	/* Stack limit test */
 	if (size > rlim[RLIMIT_STACK].rlim_cur)
-		return -ENOMEM;
+		goto undo_charge;
 
 	/* mlock limit tests */
 	if (vma->vm_flags & VM_LOCKED) {
@@ -1556,21 +1559,23 @@ static int acct_stack_growth(struct vm_a
 		locked = mm->locked_vm + grow;
 		limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
 		if (locked > limit && !capable(CAP_IPC_LOCK))
-			return -ENOMEM;
+			goto undo_charge;
 	}
 
 	/* Check to ensure the stack will not grow into a hugetlb-only region */
 	new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
 			vma->vm_end - size;
-	if (is_hugepage_only_range(vma->vm_mm, new_start, size))
-		return -EFAULT;
+	if (is_hugepage_only_range(vma->vm_mm, new_start, size)) {
+		ret = -EFAULT;
+		goto undo_charge;
+	}
 
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
 	 */
 	if (security_vm_enough_memory(grow))
-		return -ENOMEM;
+		goto undo_charge;
 
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
@@ -1578,6 +1583,11 @@ static int acct_stack_growth(struct vm_a
 		mm->locked_vm += grow;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
+undo_charge:
+	/* Undo memrlimit charge */
+	memrlimit_cgroup_uncharge_as(mm, grow);
+out:
+	return ret;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -2033,15 +2043,16 @@ unsigned long do_brk(unsigned long addr,
 		goto munmap_back;
 	}
 
+	error = -ENOMEM;
 	/* Check against address space limits *after* clearing old maps... */
 	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
-		return -ENOMEM;
+		return error;
 
 	if (mm->map_count > sysctl_max_map_count)
-		return -ENOMEM;
+		goto undo_charge;
 
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
-		return -ENOMEM;
+		goto undo_charge;
 
 	/* Can we just expand an old private anonymous mapping? */
 	vma = vma_merge(mm, prev, addr, addr + len, flags,
@@ -2055,7 +2066,7 @@ unsigned long do_brk(unsigned long addr,
 	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
-		return -ENOMEM;
+		goto undo_charge;
 	}
 
 	vma->vm_mm = mm;
@@ -2073,6 +2084,9 @@ out:
 			mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
 	}
 	return addr;
+undo_charge:
+	memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT);
+	return error;
 }
 
 EXPORT_SYMBOL(do_brk);
diff -puN mm/mremap.c~memrlimit-cgroup-add-better-error-handling mm/mremap.c
--- linux-2.6.26-rc5/mm/mremap.c~memrlimit-cgroup-add-better-error-handling	2008-06-26 14:38:55.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/mremap.c	2008-06-26 14:38:55.000000000 +0530
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/memrlimitcgroup.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -256,6 +257,7 @@ unsigned long do_mremap(unsigned long ad
 	struct vm_area_struct *vma;
 	unsigned long ret = -EINVAL;
 	unsigned long charged = 0;
+	int vm_expanded = 0;
 
 	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
 		goto out;
@@ -349,6 +351,7 @@ unsigned long do_mremap(unsigned long ad
 		goto out;
 	}
 
+	vm_expanded = 1;
 	if (vma->vm_flags & VM_ACCOUNT) {
 		charged = (new_len - old_len) >> PAGE_SHIFT;
 		if (security_vm_enough_memory(charged))
@@ -411,6 +414,9 @@ out:
 	if (ret & ~PAGE_MASK)
 		vm_unacct_memory(charged);
 out_nc:
+	if ((ret & ~PAGE_MASK) && vm_expanded)
+		memrlimit_cgroup_uncharge_as(mm,
+				(new_len - old_len) >> PAGE_SHIFT);
 	return ret;
 }
 
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [2/5] memrlimit handle attach_task() failure, add can_attach() callback
  2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
  2008-06-26  9:28 ` [1/5] memrlimit improve error handling Balbir Singh
@ 2008-06-26  9:28 ` Balbir Singh
  2008-06-26  9:28 ` [3/5] memrlimit fix sleep inside sleeplock in mm_update_next_owner() Balbir Singh
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki


Changelog v2->v1

1. Rename res_counter_add_check() to res_counter_can_add()

Making the first argument (struct res_counter *) a constant pointer causes
the compiler to spew out warnings in spin_(un)lock_irq* routines, since
we now pass address from a constant pointer to the lock routines.

This patch fixes a task migration problem reported by Kamezawa-San. This
patch should fix all issues with migraiton, except for a rare condition
documented in memrlimit_cgroup_move_task(). To fix that problem, we
would need to add transaction properties to cgroups.

The problem reported was that migrating to a group that did not have
sufficient limits to accept an incoming task caused a kernel warning.

Steps to reproduce

% mkdir /dev/cgroup/memrlimit/group_01
% mkdir /dev/cgroup/memrlimit/group_02
% echo 1G > /dev/cgroup/memrlimit/group_01/memrlimit.limit_in_bytes
% echo 0 >  /dev/cgroup/memrlimit/group_02/memrlimit.limit_in_bytes
% echo $$ > /dev/cgroup/memrlimit/group_01/tasks
% echo $$ > /dev/cgroup/memrlimit/group_02/tasks
% exit

memrlimit does the right thing by not moving the charges to group_02,
but the task is still put into g2 (since we did not use can_attach to
fail migration). Once in g2, when we echo the task to the root cgroup,
it tries to uncharge the cost of the task from g2. g2 does not have
any charge associated with the task, hence we get a warning.

Reported-by: kamezawa.hiroyu@jp.fujitsu.com

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 include/linux/res_counter.h |   18 ++++++++++++++++++
 mm/memrlimitcgroup.c        |   43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff -puN mm/memrlimitcgroup.c~memrlimit-cgroup-fix-attach-task mm/memrlimitcgroup.c
--- linux-2.6.26-rc5/mm/memrlimitcgroup.c~memrlimit-cgroup-fix-attach-task	2008-06-26 14:42:21.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/memrlimitcgroup.c	2008-06-26 14:42:21.000000000 +0530
@@ -166,6 +166,38 @@ static int memrlimit_cgroup_populate(str
 				ARRAY_SIZE(memrlimit_cgroup_files));
 }
 
+static int memrlimit_cgroup_can_move_task(struct cgroup_subsys *ss,
+					struct cgroup *cgrp,
+					struct task_struct *p)
+{
+	struct mm_struct *mm;
+	struct memrlimit_cgroup *memrcg;
+	int ret = 0;
+
+	mm = get_task_mm(p);
+	if (mm == NULL)
+		return -EINVAL;
+
+	/*
+	 * Hold mmap_sem, so that total_vm does not change underneath us
+	 */
+	down_read(&mm->mmap_sem);
+
+	rcu_read_lock();
+	if (p != rcu_dereference(mm->owner))
+		goto out;
+
+	memrcg = memrlimit_cgroup_from_cgrp(cgrp);
+
+	if (!res_counter_can_add(&memrcg->as_res, (mm->total_vm << PAGE_SHIFT)))
+		ret = -ENOMEM;
+out:
+	rcu_read_unlock();
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	return ret;
+}
+
 static void memrlimit_cgroup_move_task(struct cgroup_subsys *ss,
 					struct cgroup *cgrp,
 					struct cgroup *old_cgrp,
@@ -193,6 +225,16 @@ static void memrlimit_cgroup_move_task(s
 	if (memrcg == old_memrcg)
 		goto out;
 
+	/*
+	 * TBD: Even though we do the necessary checks in can_attach(),
+	 * by the time we come here, there is a chance that we still
+	 * fail (the memrlimit cgroup has grown its usage, and the
+	 * addition of total_vm will no longer fit into its limit)
+	 *
+	 * We need transactional support in cgroups to let us know
+	 * if can_attach() has failed and call attach_failed() on
+	 * cgroups for which can_attach() succeeded.
+	 */
 	if (res_counter_charge(&memrcg->as_res, (mm->total_vm << PAGE_SHIFT)))
 		goto out;
 	res_counter_uncharge(&old_memrcg->as_res, (mm->total_vm << PAGE_SHIFT));
@@ -231,6 +273,7 @@ struct cgroup_subsys memrlimit_cgroup_su
 	.destroy = memrlimit_cgroup_destroy,
 	.populate = memrlimit_cgroup_populate,
 	.attach = memrlimit_cgroup_move_task,
+	.can_attach = memrlimit_cgroup_can_move_task,
 	.mm_owner_changed = memrlimit_cgroup_mm_owner_changed,
 	.early_init = 0,
 };
diff -puN kernel/res_counter.c~memrlimit-cgroup-fix-attach-task kernel/res_counter.c
diff -puN include/linux/res_counter.h~memrlimit-cgroup-fix-attach-task include/linux/res_counter.h
--- linux-2.6.26-rc5/include/linux/res_counter.h~memrlimit-cgroup-fix-attach-task	2008-06-26 14:42:21.000000000 +0530
+++ linux-2.6.26-rc5-balbir/include/linux/res_counter.h	2008-06-26 14:44:39.000000000 +0530
@@ -153,4 +153,22 @@ static inline void res_counter_reset_fai
 	cnt->failcnt = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 }
+
+/*
+ * Add the value val to the resource counter and check if we are
+ * still under the limit.
+ */
+static inline bool res_counter_can_add(struct res_counter *cnt,
+						unsigned long val)
+{
+	bool ret = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage + val <= cnt->limit)
+		ret = true;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 #endif
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [3/5] memrlimit fix sleep inside sleeplock in mm_update_next_owner()
  2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
  2008-06-26  9:28 ` [1/5] memrlimit improve error handling Balbir Singh
  2008-06-26  9:28 ` [2/5] memrlimit handle attach_task() failure, add can_attach() callback Balbir Singh
@ 2008-06-26  9:28 ` Balbir Singh
  2008-06-26  9:29 ` [4/5] memrlimit improve fork and error handling Balbir Singh
  2008-06-26  9:29 ` [5/5] memrlimit correct mremap and move_vma accounting Balbir Singh
  4 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki


We have a sleep inside a spinlock (read side locking of tasklist_lock). We
try to acquire mmap_sem without releasing the read_lock. Since we have
the task_struct of the new process, we can release the read_lock, before
acquiring the task_lock of the chosen one.

Reported-by: Hugh Dickins <hugh@veritas.com>



Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 kernel/exit.c |   10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff -puN kernel/exit.c~memrlimit-fix-sleep-in-spinlock-bug kernel/exit.c
--- linux-2.6.26-rc5/kernel/exit.c~memrlimit-fix-sleep-in-spinlock-bug	2008-06-26 14:48:21.000000000 +0530
+++ linux-2.6.26-rc5-balbir/kernel/exit.c	2008-06-26 14:48:21.000000000 +0530
@@ -636,28 +636,24 @@ retry:
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
+	read_unlock(&tasklist_lock);
 	down_write(&mm->mmap_sem);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
-	/*
-	 * Delay read_unlock() till we have the task_lock()
-	 * to ensure that c does not slip away underneath us
-	 */
-	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
-		put_task_struct(c);
 		up_write(&mm->mmap_sem);
+		put_task_struct(c);
 		goto retry;
 	}
 	cgroup_mm_owner_callbacks(mm->owner, c);
 	mm->owner = c;
 	task_unlock(c);
-	put_task_struct(c);
 	up_write(&mm->mmap_sem);
+	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
 
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [4/5] memrlimit improve fork and error handling
  2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
                   ` (2 preceding siblings ...)
  2008-06-26  9:28 ` [3/5] memrlimit fix sleep inside sleeplock in mm_update_next_owner() Balbir Singh
@ 2008-06-26  9:29 ` Balbir Singh
  2008-06-26  9:29 ` [5/5] memrlimit correct mremap and move_vma accounting Balbir Singh
  4 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki


The fork path of the memrlimit patches adds an additional down_write() of
mmap_sem. Ideally memrlimit should be zero overhead for non users and the
error handling path also needed improvement. This patch fixes both problems.

The accounting has now been moved from copy_mm() to dup_mmap()

Reported-by: Hugh Dickins <hugh@veritas.com>

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 kernel/fork.c |   18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff -puN kernel/fork.c~memrlimit-improve-fork-error-handling kernel/fork.c
--- linux-2.6.26-rc5/kernel/fork.c~memrlimit-improve-fork-error-handling	2008-06-26 14:48:23.000000000 +0530
+++ linux-2.6.26-rc5-balbir/kernel/fork.c	2008-06-26 14:48:23.000000000 +0530
@@ -261,7 +261,7 @@ static int dup_mmap(struct mm_struct *mm
 	struct vm_area_struct *mpnt, *tmp, **pprev;
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
-	unsigned long charge;
+	unsigned long charge, uncharged = 0;
 	struct mempolicy *pol;
 
 	down_write(&oldmm->mmap_sem);
@@ -271,6 +271,15 @@ static int dup_mmap(struct mm_struct *mm
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
+	/*
+	 * Uncharging as a result of failure is done by mmput()
+	 * in dup_mm()
+	 */
+	if (memrlimit_cgroup_charge_as(oldmm, oldmm->total_vm)) {
+		retval = -ENOMEM;
+		goto out;
+	}
+
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
 	mm->mmap_cache = NULL;
@@ -292,6 +301,7 @@ static int dup_mmap(struct mm_struct *mm
 			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
 								-pages);
 			memrlimit_cgroup_uncharge_as(mm, pages);
+			uncharged += pages;
 			continue;
 		}
 		charge = 0;
@@ -629,12 +639,6 @@ static int copy_mm(unsigned long clone_f
 		atomic_inc(&oldmm->mm_users);
 		mm = oldmm;
 		goto good_mm;
-	} else {
-		down_write(&oldmm->mmap_sem);
-		retval = memrlimit_cgroup_charge_as(oldmm, oldmm->total_vm);
-		up_write(&oldmm->mmap_sem);
-		if (retval)
-			goto fail_nomem;
 	}
 
 	retval = -ENOMEM;
diff -puN kernel/exit.c~memrlimit-improve-fork-error-handling kernel/exit.c
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [5/5] memrlimit correct mremap and move_vma accounting
  2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
                   ` (3 preceding siblings ...)
  2008-06-26  9:29 ` [4/5] memrlimit improve fork and error handling Balbir Singh
@ 2008-06-26  9:29 ` Balbir Singh
  4 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2008-06-26  9:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, YAMAMOTO Takashi, Paul Menage, linux-kernel,
	linux-mm, Balbir Singh, KAMEZAWA Hiroyuki


The memrlimit patches did not account for move_vma() since we account for
address space usage in do_mremap(). The code flow actually increments
total_vm twice (once in do_mremap() and once in move_vma()), the excess
is removed in remove_vma_list() via do_munmap(). Since we did not do the
duplicate accounting, the code was seeing the extra uncharge, causing
our accounting to break. This patch fixes the problem

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 mm/mremap.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff -puN mm/mremap.c~memrlimit-fix-move-vma-accounting mm/mremap.c
--- linux-2.6.26-rc5/mm/mremap.c~memrlimit-fix-move-vma-accounting	2008-06-26 14:48:25.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/mremap.c	2008-06-26 14:48:25.000000000 +0530
@@ -177,10 +177,15 @@ static unsigned long move_vma(struct vm_
 	if (mm->map_count >= sysctl_max_map_count - 3)
 		return -ENOMEM;
 
+	if (memrlimit_cgroup_charge_as(mm, new_len >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
-	if (!new_vma)
+	if (!new_vma) {
+		memrlimit_cgroup_uncharge_as(mm, new_len >> PAGE_SHIFT);
 		return -ENOMEM;
+	}
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
@@ -386,6 +391,8 @@ unsigned long do_mremap(unsigned long ad
 		}
 	}
 
+	memrlimit_cgroup_uncharge_as(mm, (new_len - old_len) >> PAGE_SHIFT);
+
 	/*
 	 * We weren't able to just expand or shrink the area,
 	 * we need to create a new one and move it..
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-06-26  9:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-06-26  9:28 [0/5] memrlimit fixes Balbir Singh
2008-06-26  9:28 ` [1/5] memrlimit improve error handling Balbir Singh
2008-06-26  9:28 ` [2/5] memrlimit handle attach_task() failure, add can_attach() callback Balbir Singh
2008-06-26  9:28 ` [3/5] memrlimit fix sleep inside sleeplock in mm_update_next_owner() Balbir Singh
2008-06-26  9:29 ` [4/5] memrlimit improve fork and error handling Balbir Singh
2008-06-26  9:29 ` [5/5] memrlimit correct mremap and move_vma accounting Balbir Singh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox