From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from d01relay02.pok.ibm.com (d01relay02.pok.ibm.com [9.56.227.234]) by e3.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m5Q9SYgS009571 for ; Thu, 26 Jun 2008 05:28:34 -0400 Received: from d01av02.pok.ibm.com (d01av02.pok.ibm.com [9.56.224.216]) by d01relay02.pok.ibm.com (8.13.8/8.13.8/NCO v9.0) with ESMTP id m5Q9SYUD232998 for ; Thu, 26 Jun 2008 05:28:34 -0400 Received: from d01av02.pok.ibm.com (loopback [127.0.0.1]) by d01av02.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m5Q9SX4u012140 for ; Thu, 26 Jun 2008 05:28:34 -0400 From: Balbir Singh Date: Thu, 26 Jun 2008 14:58:29 +0530 Message-Id: <20080626092829.16841.8616.sendpatchset@balbir-laptop> In-Reply-To: <20080626092815.16841.54817.sendpatchset@balbir-laptop> References: <20080626092815.16841.54817.sendpatchset@balbir-laptop> Subject: [1/5] memrlimit improve error handling Sender: owner-linux-mm@kvack.org Return-Path: To: Andrew Morton Cc: Hugh Dickins , YAMAMOTO Takashi , Paul Menage , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Balbir Singh , KAMEZAWA Hiroyuki List-ID: Andrew, This is a newer version of the patch, could you please drop the older one and pick this. Aplogies for the inconvenience. Changelog v2->v1 1. Address some of Hugh's comments. Reuse error (don't add new ret variable) TODO: Merge ret & ~PAGE_MASK and vm_expanded. memrlimit cgroup does not handle error cases after may_expand_vm(). This BUG was reported by Kamezawa, with the test case below to reproduce it [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 71921664 [root@iridium kamezawa]# ulimit -s 3 [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ulimit -s unlimited [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 72368128 [root@iridium kamezawa]# This patch adds better handling support to fix the reported problem. Reported-By: kamezawa.hiroyu@jp.fujitsu.com Signed-off-by: Balbir Singh --- mm/mmap.c | 36 +++++++++++++++++++++++++----------- mm/mremap.c | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff -puN mm/mmap.c~memrlimit-cgroup-add-better-error-handling mm/mmap.c --- linux-2.6.26-rc5/mm/mmap.c~memrlimit-cgroup-add-better-error-handling 2008-06-26 14:38:55.000000000 +0530 +++ linux-2.6.26-rc5-balbir/mm/mmap.c 2008-06-26 14:38:55.000000000 +0530 @@ -1123,7 +1123,7 @@ munmap_back: */ charged = len >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) - return -ENOMEM; + goto undo_charge; vm_flags |= VM_ACCOUNT; } } @@ -1245,6 +1245,8 @@ free_vma: unacct_error: if (charged) vm_unacct_memory(charged); +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); return error; } @@ -1540,14 +1542,15 @@ static int acct_stack_growth(struct vm_a struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; unsigned long new_start; + int ret = -ENOMEM; /* address space limit tests */ if (!may_expand_vm(mm, grow)) - return -ENOMEM; + goto out; /* Stack limit test */ if (size > rlim[RLIMIT_STACK].rlim_cur) - return -ENOMEM; + goto undo_charge; /* mlock limit tests */ if (vma->vm_flags & VM_LOCKED) { @@ -1556,21 +1559,23 @@ static int acct_stack_growth(struct vm_a locked = mm->locked_vm + grow; limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) - return -ENOMEM; + goto undo_charge; } /* Check to ensure the stack will not grow into a hugetlb-only region */ new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : vma->vm_end - size; - if (is_hugepage_only_range(vma->vm_mm, new_start, size)) - return -EFAULT; + if (is_hugepage_only_range(vma->vm_mm, new_start, size)) { + ret = -EFAULT; + goto undo_charge; + } /* * Overcommit.. This must be the final test, as it will * update security statistics. */ if (security_vm_enough_memory(grow)) - return -ENOMEM; + goto undo_charge; /* Ok, everything looks good - let it rip */ mm->total_vm += grow; @@ -1578,6 +1583,11 @@ static int acct_stack_growth(struct vm_a mm->locked_vm += grow; vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; +undo_charge: + /* Undo memrlimit charge */ + memrlimit_cgroup_uncharge_as(mm, grow); +out: + return ret; } #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) @@ -2033,15 +2043,16 @@ unsigned long do_brk(unsigned long addr, goto munmap_back; } + error = -ENOMEM; /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) - return -ENOMEM; + return error; if (mm->map_count > sysctl_max_map_count) - return -ENOMEM; + goto undo_charge; if (security_vm_enough_memory(len >> PAGE_SHIFT)) - return -ENOMEM; + goto undo_charge; /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, @@ -2055,7 +2066,7 @@ unsigned long do_brk(unsigned long addr, vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); - return -ENOMEM; + goto undo_charge; } vma->vm_mm = mm; @@ -2073,6 +2084,9 @@ out: mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages; } return addr; +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); + return error; } EXPORT_SYMBOL(do_brk); diff -puN mm/mremap.c~memrlimit-cgroup-add-better-error-handling mm/mremap.c --- linux-2.6.26-rc5/mm/mremap.c~memrlimit-cgroup-add-better-error-handling 2008-06-26 14:38:55.000000000 +0530 +++ linux-2.6.26-rc5-balbir/mm/mremap.c 2008-06-26 14:38:55.000000000 +0530 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -256,6 +257,7 @@ unsigned long do_mremap(unsigned long ad struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + int vm_expanded = 0; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -349,6 +351,7 @@ unsigned long do_mremap(unsigned long ad goto out; } + vm_expanded = 1; if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) @@ -411,6 +414,9 @@ out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); out_nc: + if ((ret & ~PAGE_MASK) && vm_expanded) + memrlimit_cgroup_uncharge_as(mm, + (new_len - old_len) >> PAGE_SHIFT); return ret; } _ -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org