From: Rik van Riel <riel@redhat.com>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-mm@kvack.org, sjiang@cs.wm.edu, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] token based thrashing control
Date: Sun, 1 Aug 2004 21:36:15 -0400 (EDT) [thread overview]
Message-ID: <Pine.LNX.4.58.0408012132030.13053@dhcp030.home.surriel.com> (raw)
In-Reply-To: <20040801175618.711a3aac.akpm@osdl.org>
On Sun, 1 Aug 2004, Andrew Morton wrote:
> Rik van Riel <riel@redhat.com> wrote:
> > However, for make -j 60 there's a dramatic difference between
> > a kernel with the token based swapout and a kernel without.
> >
> > normal 2.6.8-rc2: 1h20m runtime / ~26% CPU use average
> > 2.6.8-rc2 + token: 42m runtime / ~52% CPU use average
>
> OK. My test is usually around 50-60% CPU occupancy so we're not gaining
> in the moderate swapping range.
I wonder if measuring minor faults too would help here ...
Btw, here's a slightly updated patch. It's got the definition
for put_swap_token fixed for !CONFIG_SWAP and calls put_swap_token
before mmput.
I also cut the 4G/4G split line out of the mm/Makefile patch chunk,
so that should now apply better.
It doesn't have any functional changes I'm aware of.
--- linux-2.6.7/include/linux/swap.h.token 2004-07-30 13:22:17.000000000 -0400
+++ linux-2.6.7/include/linux/swap.h 2004-08-01 21:28:29.411274311 -0400
@@ -204,6 +204,27 @@
extern struct page * lookup_swap_cache(swp_entry_t);
extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma,
unsigned long addr);
+/* linux/mm/thrash.c */
+#ifdef CONFIG_SWAP
+extern struct mm_struct * swap_token_mm;
+extern void grab_swap_token(void);
+extern void __put_swap_token(struct mm_struct *);
+
+static inline int has_swap_token(struct mm_struct * mm)
+{
+ return (mm == swap_token_mm);
+}
+
+static inline void put_swap_token(struct mm_struct * mm)
+{
+ if (has_swap_token(mm))
+ __put_swap_token(mm);
+}
+#else /* CONFIG_SWAP */
+#define put_swap_token(x) do { } while(0)
+#define grab_swap_token do { } while(0)
+#define has_swap_token 0
+#endif /* CONFIG_SWAP */
/* linux/mm/swapfile.c */
extern long total_swap_pages;
--- linux-2.6.7/include/linux/sched.h.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/include/linux/sched.h 2004-07-30 13:22:29.000000000 -0400
@@ -239,6 +239,10 @@
/* Architecture-specific MM context */
mm_context_t context;
+ /* Token based thrashing protection. */
+ unsigned long swap_token_time;
+ char recent_pagein;
+
/* coredumping support */
int core_waiters;
struct completion *core_startup_done, core_done;
--- linux-2.6.7/kernel/fork.c.token 2004-07-30 13:22:27.000000000 -0400
+++ linux-2.6.7/kernel/fork.c 2004-08-01 20:44:50.000000000 -0400
@@ -36,6 +36,7 @@
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/rmap.h>
+#include <linux/swap.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -462,6 +463,7 @@
spin_unlock(&mmlist_lock);
exit_aio(mm);
exit_mmap(mm);
+ put_swap_token(mm);
mmdrop(mm);
}
}
--- linux-2.6.7/mm/memory.c.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/mm/memory.c 2004-07-30 13:22:29.000000000 -0400
@@ -1433,6 +1433,7 @@
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
inc_page_state(pgmajfault);
+ grab_swap_token();
}
mark_page_accessed(page);
--- linux-2.6.7/mm/filemap.c.token 2004-07-30 13:22:28.000000000 -0400
+++ linux-2.6.7/mm/filemap.c 2004-07-30 13:22:29.000000000 -0400
@@ -1195,6 +1195,7 @@
* effect.
*/
error = page_cache_read(file, pgoff);
+ grab_swap_token();
/*
* The page we want has now been added to the page cache.
--- /dev/null 2003-09-15 09:40:47.000000000 -0400
+++ linux-2.6.7/mm/thrash.c 2004-07-31 01:54:26.000000000 -0400
@@ -0,0 +1,100 @@
+/*
+ * mm/thrash.c
+ *
+ * Copyright (C) 2004, Red Hat, Inc.
+ * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Simple token based thrashing protection, using the algorithm
+ * described in: http://www.cs.wm.edu/~sjiang/token.pdf
+ */
+#include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+
+static spinlock_t swap_token_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long swap_token_timeout;
+unsigned long swap_token_check;
+struct mm_struct * swap_token_mm = &init_mm;
+
+#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2)
+#define SWAP_TOKEN_TIMEOUT (HZ * 300)
+
+/*
+ * Take the token away if the process had no page faults
+ * in the last interval, or if it has held the token for
+ * too long.
+ */
+#define SWAP_TOKEN_ENOUGH_RSS 1
+#define SWAP_TOKEN_TIMED_OUT 2
+static int should_release_swap_token(struct mm_struct * mm)
+{
+ int ret = 0;
+ if (!mm->recent_pagein)
+ ret = SWAP_TOKEN_ENOUGH_RSS;
+ else if (time_after(jiffies, swap_token_timeout))
+ ret = SWAP_TOKEN_TIMED_OUT;
+ mm->recent_pagein = 0;
+ return ret;
+}
+
+/*
+ * Try to grab the swapout protection token. We only try to
+ * grab it once every TOKEN_CHECK_INTERVAL, both to prevent
+ * SMP lock contention and to check that the process that held
+ * the token before is no longer thrashing.
+ */
+void grab_swap_token(void)
+{
+ struct mm_struct * mm;
+ int reason;
+
+ /* We have the token. Let others know we still need it. */
+ if (has_swap_token(current->mm)) {
+ current->mm->recent_pagein = 1;
+ return;
+ }
+
+ if (time_after(jiffies, swap_token_check)) {
+
+ /* Can't get swapout protection if we exceed our RSS limit. */
+ // if (current->mm->rss > current->mm->rlimit_rss)
+ // return;
+
+ /* ... or if we recently held the token. */
+ if (time_before(jiffies, current->mm->swap_token_time))
+ return;
+
+ if (!spin_trylock(&swap_token_lock))
+ return;
+
+ swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
+
+ mm = swap_token_mm;
+ if ((reason = should_release_swap_token(mm))) {
+ unsigned long eligible = jiffies;
+ if (reason == SWAP_TOKEN_TIMED_OUT) {
+ eligible += SWAP_TOKEN_TIMEOUT;
+ }
+ mm->swap_token_time = eligible;
+ swap_token_timeout = jiffies + SWAP_TOKEN_TIMEOUT;
+ swap_token_mm = current->mm;
+ printk("Took swap token, pid %d (%s)\n",
+ current->pid, current->comm);
+ }
+ spin_unlock(&swap_token_lock);
+ }
+ return;
+}
+
+/* Called on process exit. */
+void __put_swap_token(struct mm_struct * mm)
+{
+ spin_lock(&swap_token_lock);
+ if (likely(mm == swap_token_mm)) {
+ swap_token_mm = &init_mm;
+ swap_token_check = jiffies;
+ }
+ spin_unlock(&swap_token_lock);
+}
--- linux-2.6.7/mm/rmap.c.token 2004-07-30 13:22:24.000000000 -0400
+++ linux-2.6.7/mm/rmap.c 2004-08-01 21:15:29.861020222 -0400
@@ -230,6 +230,9 @@
if (ptep_clear_flush_young(vma, address, pte))
referenced++;
+ if (mm != current->mm && has_swap_token(mm))
+ referenced++;
+
(*mapcount)--;
out_unmap:
--- linux-2.6.7/mm/Makefile.token 2004-07-30 13:22:27.000000000 -0400
+++ linux-2.6.7/mm/Makefile 2004-07-30 13:22:29.000000000 -0400
@@ -12,6 +12,6 @@
readahead.o slab.o swap.o truncate.o vmscan.o \
$(mmu-y)
-obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next prev parent reply other threads:[~2004-08-02 1:36 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-07-30 21:37 Rik van Riel
2004-07-31 11:34 ` Nikita Danilov
2004-07-31 11:43 ` Rik van Riel
2004-08-01 11:05 ` Andrew Morton
2004-08-01 11:13 ` Arjan van de Ven
2004-08-01 21:52 ` Rik van Riel
2004-08-01 13:02 ` Rik van Riel
2004-08-02 0:56 ` Andrew Morton
2004-08-02 1:36 ` Rik van Riel [this message]
2004-08-02 2:52 ` Con Kolivas
2004-08-02 3:33 ` Rik van Riel
2004-08-02 5:13 ` Con Kolivas
2004-08-02 5:18 ` Con Kolivas
2004-08-03 0:34 ` Song Jiang
2004-08-03 1:20 ` Rik van Riel
2004-08-04 4:51 ` Song Jiang
2004-08-04 11:30 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Pine.LNX.4.58.0408012132030.13053@dhcp030.home.surriel.com \
--to=riel@redhat.com \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=sjiang@cs.wm.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox