* [PATCH] RSS limit enforcement for 2.6
@ 2004-03-15 23:21 Rik van Riel
2004-03-16 6:08 ` Nick Piggin
0 siblings, 1 reply; 8+ messages in thread
From: Rik van Riel @ 2004-03-15 23:21 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, Hugh Dickins, Pavel Machek, Nick Piggin
Hi,
Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
code that may well explain why the previous port of the code
to 2.6 resulted in bad performance. The split active lists
in 2.4-rmap probably masked the largest damages, but in 2.6
it was very much visible.
The patch below should work. Pavel, Nick, still interested
in testing the performance ? ;)
===== fs/exec.c 1.105 vs edited =====
--- 1.105/fs/exec.c Wed Feb 25 05:34:47 2004
+++ edited/fs/exec.c Mon Mar 15 17:27:06 2004
@@ -1119,6 +1119,11 @@
if (retval < 0)
goto out_mm;
+ if (likely(current->mm))
+ bprm.mm->rlimit_rss = current->mm->rlimit_rss;
+ else
+ bprm.mm->rlimit_rss = init_mm.rlimit_rss;
+
bprm.argc = count(argv, bprm.p / sizeof(void *));
if ((retval = bprm.argc) < 0)
goto out_mm;
===== include/linux/init_task.h 1.29 vs edited =====
--- 1.29/include/linux/init_task.h Wed Feb 18 22:42:38 2004
+++ edited/include/linux/init_task.h Mon Mar 15 17:27:57 2004
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <asm/resource.h>
#define INIT_FILES \
{ \
@@ -42,6 +43,7 @@
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.cpu_vm_mask = CPU_MASK_ALL, \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY, \
}
#define INIT_SIGNALS(sig) { \
===== include/linux/sched.h 1.185 vs edited =====
--- 1.185/include/linux/sched.h Sun Mar 7 02:05:01 2004
+++ edited/include/linux/sched.h Mon Mar 15 17:28:38 2004
@@ -205,6 +205,7 @@
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
===== include/linux/swap.h 1.80 vs edited =====
--- 1.80/include/linux/swap.h Mon Jan 19 01:28:35 2004
+++ edited/include/linux/swap.h Mon Mar 15 17:29:00 2004
@@ -179,7 +179,7 @@
/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */
===== kernel/sys.c 1.73 vs edited =====
--- 1.73/kernel/sys.c Mon Feb 23 14:46:54 2004
+++ edited/kernel/sys.c Mon Mar 15 17:30:13 2004
@@ -1489,6 +1489,14 @@
if (retval)
return retval;
+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
===== mm/rmap.c 1.36 vs edited =====
--- 1.36/mm/rmap.c Sun Mar 7 02:04:57 2004
+++ edited/mm/rmap.c Mon Mar 15 17:30:45 2004
@@ -104,6 +104,7 @@
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * @rsslimit: set if the process(es) using the page is(are) over RSS limit
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -112,10 +113,11 @@
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/
-int fastcall page_referenced(struct page * page)
+int fastcall page_referenced(struct page * page, int * rsslimit)
{
struct pte_chain *pc;
- int referenced = 0;
+ int referenced = 0, over_rsslimit = 0;
+ struct mm_struct * mm;
if (page_test_and_clear_young(page))
mark_page_accessed(page);
@@ -125,11 +127,15 @@
if (PageDirect(page)) {
pte_t *pte = rmap_ptep_map(page->pte.direct);
+ mm = ptep_to_mm(pte);
if (ptep_test_and_clear_young(pte))
referenced++;
+ if (mm->rss > mm->rlimit_rss)
+ over_rsslimit = 1;
rmap_ptep_unmap(pte);
- } else {
+ } else if (page->pte.chain) {
int nr_chains = 0;
+ int over_rsslimit = 1;
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
@@ -142,6 +148,9 @@
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+ mm = ptep_to_mm(p);
+ if (mm->rss <= mm->rlimit_rss)
+ over_rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
@@ -154,6 +163,8 @@
__pte_chain_free(pc);
}
}
+ *rsslimit = over_rsslimit;
+
return referenced;
}
===== mm/vmscan.c 1.198 vs edited =====
--- 1.198/mm/vmscan.c Fri Mar 12 04:33:10 2004
+++ edited/mm/vmscan.c Mon Mar 15 18:07:32 2004
@@ -250,6 +250,7 @@
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;
cond_resched();
@@ -276,8 +277,8 @@
goto keep_locked;
pte_chain_lock(page);
- referenced = page_referenced(page);
- if (referenced && page_mapping_inuse(page)) {
+ referenced = page_referenced(page, &over_rsslimit);
+ if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
goto activate_locked;
@@ -593,6 +594,7 @@
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;
lru_add_drain();
pgmoved = 0;
@@ -657,7 +659,7 @@
continue;
}
pte_chain_lock(page);
- if (page_referenced(page)) {
+ if (page_referenced(page, &over_rsslimit) && !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-15 23:21 [PATCH] RSS limit enforcement for 2.6 Rik van Riel
@ 2004-03-16 6:08 ` Nick Piggin
2004-03-18 22:04 ` Pavel Machek
0 siblings, 1 reply; 8+ messages in thread
From: Nick Piggin @ 2004-03-16 6:08 UTC (permalink / raw)
To: Rik van Riel
Cc: Andrew Morton, linux-mm, linux-kernel, Hugh Dickins, Pavel Machek
Rik van Riel wrote:
>Hi,
>
>Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
>code that may well explain why the previous port of the code
>to 2.6 resulted in bad performance. The split active lists
>in 2.4-rmap probably masked the largest damages, but in 2.6
>it was very much visible.
>
>
Hi Rik,
What was the problem by the way?
>The patch below should work. Pavel, Nick, still interested
>in testing the performance ? ;)
>
I could do that.
>@@ -593,6 +594,7 @@
> long mapped_ratio;
> long distress;
> long swap_tendency;
>+ int over_rsslimit;
>
> lru_add_drain();
> pgmoved = 0;
>@@ -657,7 +659,7 @@
> continue;
> }
> pte_chain_lock(page);
>- if (page_referenced(page)) {
>+ if (page_referenced(page, &over_rsslimit) && !over_rsslimit) {
> pte_chain_unlock(page);
> list_add(&page->lru, &l_active);
> continue;
>
This still has a problem that !reclaim_mapped scans will not
shrink a runaway process before putting a lot of pressure on
the rest of the pagecache.
You could do a page_gather_pte_info type thing that doesn't
actually clear all the referenced bits (would probably
SetPageReferenced). Unfortunately this has the downside that
you also need to walk the pte chains for all mapped pages even
in the !reclaim_mapped case.
But it is a good start. We advertise the functionality, so we
should be trying to do something with rss limits.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-16 6:08 ` Nick Piggin
@ 2004-03-18 22:04 ` Pavel Machek
2004-03-25 14:44 ` Rik van Riel
0 siblings, 1 reply; 8+ messages in thread
From: Pavel Machek @ 2004-03-18 22:04 UTC (permalink / raw)
To: Nick Piggin
Cc: Rik van Riel, Andrew Morton, linux-mm, linux-kernel,
Hugh Dickins, Pavel Machek
Hi!
> >Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
> >code that may well explain why the previous port of the code
> >to 2.6 resulted in bad performance. The split active lists
> >in 2.4-rmap probably masked the largest damages, but in 2.6
> >it was very much visible.
> >
> >
>
> Hi Rik,
> What was the problem by the way?
When running lingvistics computation, machine got completely
unusable due to bad memory pressure. nice -n 19 was
useless. Memory limit should help.
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-18 22:04 ` Pavel Machek
@ 2004-03-25 14:44 ` Rik van Riel
2004-03-25 22:23 ` Pavel Machek
0 siblings, 1 reply; 8+ messages in thread
From: Rik van Riel @ 2004-03-25 14:44 UTC (permalink / raw)
To: Pavel Machek
Cc: Nick Piggin, Andrew Morton, linux-mm, linux-kernel, Hugh Dickins
On Thu, 18 Mar 2004, Pavel Machek wrote:
> When running lingvistics computation, machine got completely
> unusable due to bad memory pressure. nice -n 19 was
> useless. Memory limit should help.
Is this with the new patch, with the old patch or
without any RSS limiting patch ?
--
"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it." - Brian W. Kernighan
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-25 14:44 ` Rik van Riel
@ 2004-03-25 22:23 ` Pavel Machek
0 siblings, 0 replies; 8+ messages in thread
From: Pavel Machek @ 2004-03-25 22:23 UTC (permalink / raw)
To: Rik van Riel
Cc: Nick Piggin, Andrew Morton, linux-mm, linux-kernel, Hugh Dickins
Hi!
> > When running lingvistics computation, machine got completely
> > unusable due to bad memory pressure. nice -n 19 was
> > useless. Memory limit should help.
>
> Is this with the new patch, with the old patch or
> without any RSS limiting patch ?
That was without any RSS limiting patch. I'm sorry, I have no time for
lingvistics just now.
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-02-05 7:18 ` Andrew Morton
@ 2004-02-05 13:11 ` Nick Piggin
0 siblings, 0 replies; 8+ messages in thread
From: Nick Piggin @ 2004-02-05 13:11 UTC (permalink / raw)
To: Andrew Morton; +Cc: Rik van Riel, pavel, linux-mm
Andrew Morton wrote:
Snip [RSS not effective]
>
>Note that there is still a problem in refill_inactive_zone():
>
> if (page_mapped(page)) {
>
> /*
> * Don't clear page referenced if we're not going
> * to use it.
> */
> if (!reclaim_mapped && !over_rsslimit) {
> list_add(&page->lru, &l_ignore);
> continue;
> }
>
> /*
> * probably it would be useful to transfer dirty bit
> * from pte to the @page here.
> */
> pte_chain_lock(page);
> if (page_mapped(page) &&
> page_referenced(page, &over_rsslimit) &&
> !over_rsslimit) {
> pte_chain_unlock(page);
> list_add(&page->lru, &l_active);
> continue;
> }
> pte_chain_unlock(page);
> }
>
>That first test of over_rsslimit is kinda bogus: we haven't run
>
Probably why it isn't reclaiming your mapped pages
>page_referenced() yet! But the recent change of moving that little chunk
>of code to before the page_referenced() check was correct.
>
>So to get this right, we may need to split the over-limit stuff apart from
>the page_referenced() processing.
>
>
This is one thing I was worried about with my change, and I
thought the same thing.
Have a function to check rss limit and could also move
referenced bits to the page's flags, then page_referenced could
just return TestClearPageReferenced.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-01-27 17:51 Rik van Riel
@ 2004-02-05 7:18 ` Andrew Morton
2004-02-05 13:11 ` Nick Piggin
0 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2004-02-05 7:18 UTC (permalink / raw)
To: Rik van Riel; +Cc: pavel, linux-mm
Rik van Riel <riel@redhat.com> wrote:
>
> the patch below (softly) enforces RLIMIT_RSS in the 2.6 kernel,
I fixed another problem in this. page_referenced() was going into the
second leg of that big if() statement for pagecache and swapcache pages
which are not mapped into anyone's pagetables. Due to the logic in there
we end up never setting *rsslimit again and we think all pagecache pages
are "rss over limit"
mm/rmap.c | 4 ++++
1 files changed, 4 insertions(+)
diff -puN mm/rmap.c~vm-rss-limit-fix-fix mm/rmap.c
--- 25/mm/rmap.c~vm-rss-limit-fix-fix 2004-02-04 23:01:30.000000000 -0800
+++ 25-akpm/mm/rmap.c 2004-02-04 23:01:46.000000000 -0800
@@ -130,6 +130,9 @@ int page_referenced(struct page *page, i
if (TestClearPageReferenced(page))
referenced++;
+ if (!page_mapped(page))
+ goto out;
+
if (PageDirect(page)) {
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
@@ -172,6 +175,7 @@ int page_referenced(struct page *page, i
__pte_chain_free(pc);
}
}
+out:
return referenced;
}
With this patch, everything seems to be doing what it's supposed to do.
But it doesn't seem to be effective. On a 256M box I started a process
which allocated 100M of anon memory and just went to sleep. Then I set
`ulimit -m 4000' (4 megs) and ran 4-thread qsbench under that. Debug code
told me that page_referenced() was returning non-zero *rsslimit.
But after 20 seconds of qsbenching I killed it and found that all of the
innocent 100M had been swapped out and reclaimed.
Note that there is still a problem in refill_inactive_zone():
if (page_mapped(page)) {
/*
* Don't clear page referenced if we're not going
* to use it.
*/
if (!reclaim_mapped && !over_rsslimit) {
list_add(&page->lru, &l_ignore);
continue;
}
/*
* probably it would be useful to transfer dirty bit
* from pte to the @page here.
*/
pte_chain_lock(page);
if (page_mapped(page) &&
page_referenced(page, &over_rsslimit) &&
!over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
}
pte_chain_unlock(page);
}
That first test of over_rsslimit is kinda bogus: we haven't run
page_referenced() yet! But the recent change of moving that little chunk
of code to before the page_referenced() check was correct.
So to get this right, we may need to split the over-limit stuff apart from
the page_referenced() processing.
Anyway, needs more work. I'll drop the patch out. Here's what I currently
have, against next -mm.
fs/exec.c | 5 +++++
include/linux/init_task.h | 2 ++
include/linux/sched.h | 1 +
include/linux/swap.h | 4 ++--
kernel/sys.c | 8 ++++++++
mm/rmap.c | 24 +++++++++++++++++++++++-
mm/vmscan.c | 12 ++++++++----
7 files changed, 49 insertions(+), 7 deletions(-)
diff -puN include/linux/init_task.h~vm-rss-limit-enforcement include/linux/init_task.h
--- 25/include/linux/init_task.h~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/include/linux/init_task.h 2004-02-04 22:28:38.000000000 -0800
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <linux/resource.h>
#define INIT_FILES \
{ \
@@ -42,6 +43,7 @@
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.cpu_vm_mask = CPU_MASK_ALL, \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY \
}
#define INIT_SIGNALS(sig) { \
diff -puN include/linux/sched.h~vm-rss-limit-enforcement include/linux/sched.h
--- 25/include/linux/sched.h~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/include/linux/sched.h 2004-02-04 22:28:38.000000000 -0800
@@ -205,6 +205,7 @@ struct mm_struct {
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
diff -puN include/linux/swap.h~vm-rss-limit-enforcement include/linux/swap.h
--- 25/include/linux/swap.h~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/include/linux/swap.h 2004-02-04 22:28:38.000000000 -0800
@@ -179,7 +179,7 @@ extern int vm_swappiness;
/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@ int FASTCALL(try_to_unmap(struct page *)
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */
diff -puN kernel/sys.c~vm-rss-limit-enforcement kernel/sys.c
--- 25/kernel/sys.c~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/kernel/sys.c 2004-02-04 22:28:38.000000000 -0800
@@ -1306,6 +1306,14 @@ asmlinkage long sys_setrlimit(unsigned i
if (retval)
return retval;
+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
diff -puN mm/rmap.c~vm-rss-limit-enforcement mm/rmap.c
--- 25/mm/rmap.c~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/mm/rmap.c 2004-02-04 23:13:14.000000000 -0800
@@ -104,6 +104,7 @@ pte_chain_encode(struct pte_chain *pte_c
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * @rsslimit: set if the process(es) using the page is(are) over RSS limit.
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -111,26 +112,42 @@ pte_chain_encode(struct pte_chain *pte_c
*
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
+ *
+ * The pte_chain_lock() is sufficient to pin down mm_structs while we examine
+ * them.
*/
-int page_referenced(struct page * page)
+int page_referenced(struct page *page, int *rsslimit)
{
+ struct mm_struct * mm;
struct pte_chain *pc;
int referenced = 0;
+ *rsslimit = 0;
+
if (page_test_and_clear_young(page))
mark_page_accessed(page);
if (TestClearPageReferenced(page))
referenced++;
+ if (!page_mapped(page))
+ goto out;
+
if (PageDirect(page)) {
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
referenced++;
+
+ mm = ptep_to_mm(pte);
+ if (mm->rss > mm->rlimit_rss)
+ *rsslimit = 1;
rmap_ptep_unmap(pte);
} else {
int nr_chains = 0;
+ /* We clear it if any task using the page is under its limit. */
+ *rsslimit = 1;
+
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
int i;
@@ -142,6 +159,10 @@ int page_referenced(struct page * page)
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+
+ mm = ptep_to_mm(p);
+ if (mm->rss < mm->rlimit_rss)
+ *rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
@@ -154,6 +175,7 @@ int page_referenced(struct page * page)
__pte_chain_free(pc);
}
}
+out:
return referenced;
}
diff -puN mm/vmscan.c~vm-rss-limit-enforcement mm/vmscan.c
--- 25/mm/vmscan.c~vm-rss-limit-enforcement 2004-02-04 22:28:38.000000000 -0800
+++ 25-akpm/mm/vmscan.c 2004-02-04 22:28:38.000000000 -0800
@@ -249,6 +249,7 @@ shrink_list(struct list_head *page_list,
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;
cond_resched();
@@ -275,8 +276,8 @@ shrink_list(struct list_head *page_list,
goto keep_locked;
pte_chain_lock(page);
- referenced = page_referenced(page);
- if (referenced && page_mapping_inuse(page)) {
+ referenced = page_referenced(page, &over_rsslimit);
+ if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
goto activate_locked;
@@ -635,6 +636,7 @@ refill_inactive_zone(struct zone *zone,
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;
/*
* `distress' is a measure of how much trouble we're having reclaiming
@@ -715,7 +717,7 @@ refill_inactive_zone(struct zone *zone,
* Don't clear page referenced if we're not going
* to use it.
*/
- if (!reclaim_mapped) {
+ if (!reclaim_mapped && !over_rsslimit) {
list_add(&page->lru, &l_ignore);
continue;
}
@@ -725,7 +727,9 @@ refill_inactive_zone(struct zone *zone,
* from pte to the @page here.
*/
pte_chain_lock(page);
- if (page_mapped(page) && page_referenced(page)) {
+ if (page_mapped(page) &&
+ page_referenced(page, &over_rsslimit) &&
+ !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
diff -puN fs/exec.c~vm-rss-limit-enforcement fs/exec.c
--- 25/fs/exec.c~vm-rss-limit-enforcement 2004-02-04 23:13:03.000000000 -0800
+++ 25-akpm/fs/exec.c 2004-02-04 23:13:03.000000000 -0800
@@ -1117,6 +1117,11 @@ int do_execve(char * filename,
retval = init_new_context(current, bprm.mm);
if (retval < 0)
goto out_mm;
+ if (likely(current->mm)) {
+ bprm.mm->rlimit_rss = current->mm->rlimit_rss;
+ } else {
+ bprm.mm->rlimit_rss = init_mm.rlimit_rss;
+ }
bprm.argc = count(argv, bprm.p / sizeof(void *));
if ((retval = bprm.argc) < 0)
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] RSS limit enforcement for 2.6
@ 2004-01-27 17:51 Rik van Riel
2004-02-05 7:18 ` Andrew Morton
0 siblings, 1 reply; 8+ messages in thread
From: Rik van Riel @ 2004-01-27 17:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: Pavel Machek, Linus Torvalds, linux-mm, linux-kernel
Hi Andrew, Linus,
the patch below (softly) enforces RLIMIT_RSS in the 2.6 kernel,
it has been tested by Pavel and seems to work ok for his workload.
Please place it in -mm for more extensive testing.
thanks,
Rik
===== include/linux/init_task.h 1.27 vs edited =====
--- 1.27/include/linux/init_task.h Mon Aug 18 22:46:23 2003
+++ edited/include/linux/init_task.h Tue Jan 20 17:34:40 2004
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <asm/resource.h>
#define INIT_FILES \
{ \
@@ -41,6 +42,7 @@
.page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY \
}
#define INIT_SIGNALS(sig) { \
===== include/linux/sched.h 1.178 vs edited =====
--- 1.178/include/linux/sched.h Mon Jan 19 18:38:15 2004
+++ edited/include/linux/sched.h Tue Jan 20 17:32:56 2004
@@ -204,6 +204,7 @@
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
cpumask_t cpu_vm_mask;
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
===== include/linux/swap.h 1.80 vs edited =====
--- 1.80/include/linux/swap.h Mon Jan 19 01:28:35 2004
+++ edited/include/linux/swap.h Tue Jan 20 18:16:28 2004
@@ -179,7 +179,7 @@
/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */
===== kernel/sys.c 1.69 vs edited =====
--- 1.69/kernel/sys.c Mon Jan 19 18:38:13 2004
+++ edited/kernel/sys.c Tue Jan 20 18:02:19 2004
@@ -1308,6 +1308,14 @@
if (retval)
return retval;
+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
===== mm/rmap.c 1.34 vs edited =====
--- 1.34/mm/rmap.c Mon Jan 19 01:36:00 2004
+++ edited/mm/rmap.c Tue Jan 20 18:26:03 2004
@@ -104,6 +104,7 @@
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * rsslimit: set if the process(es) using the page is(are) over RSS limit
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -112,8 +113,9 @@
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/
-int page_referenced(struct page * page)
+int page_referenced(struct page * page, int * rsslimit)
{
+ struct mm_struct * mm;
struct pte_chain *pc;
int referenced = 0;
@@ -127,10 +129,17 @@
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
referenced++;
+
+ mm = ptep_to_mm(pte);
+ if (mm->rss > mm->rlimit_rss)
+ *rsslimit = 1;
rmap_ptep_unmap(pte);
} else {
int nr_chains = 0;
+ /* We clear it if any task using the page is under its limit. */
+ *rsslimit = 1;
+
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
int i;
@@ -142,6 +151,10 @@
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+
+ mm = ptep_to_mm(p);
+ if (mm->rss < mm->rlimit_rss)
+ *rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
===== mm/vmscan.c 1.177 vs edited =====
--- 1.177/mm/vmscan.c Mon Jan 19 18:38:07 2004
+++ edited/mm/vmscan.c Fri Jan 23 14:00:48 2004
@@ -250,6 +250,7 @@
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;
cond_resched();
@@ -278,8 +279,8 @@
goto keep_locked;
pte_chain_lock(page);
- referenced = page_referenced(page);
- if (referenced && page_mapping_inuse(page)) {
+ referenced = page_referenced(page, &over_rsslimit);
+ if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
goto activate_locked;
@@ -597,6 +598,7 @@
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;
lru_add_drain();
pgmoved = 0;
@@ -657,7 +659,7 @@
list_del(&page->lru);
if (page_mapped(page)) {
pte_chain_lock(page);
- if (page_mapped(page) && page_referenced(page)) {
+ if (page_mapped(page) && page_referenced(page, &over_rsslimit) && !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2004-03-25 22:23 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-03-15 23:21 [PATCH] RSS limit enforcement for 2.6 Rik van Riel
2004-03-16 6:08 ` Nick Piggin
2004-03-18 22:04 ` Pavel Machek
2004-03-25 14:44 ` Rik van Riel
2004-03-25 22:23 ` Pavel Machek
-- strict thread matches above, loose matches on Subject: below --
2004-01-27 17:51 Rik van Riel
2004-02-05 7:18 ` Andrew Morton
2004-02-05 13:11 ` Nick Piggin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox