linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. Werner Fink" <werner@suse.de>
To: "Stephen C. Tweedie" <sct@redhat.com>,
	Linus Torvalds <torvalds@transmeta.com>,
	Rik van Riel <H.H.vanRiel@phys.uu.nl>
Cc: linux-mm <linux-mm@kvack.org>,
	Kernel Mailing List <linux-kernel@vger.rutgers.edu>
Subject: Running 2.1.129 at extrem load [patch] (Was: Linux-2.1.129..)
Date: Mon, 23 Nov 1998 21:53:59 +0100	[thread overview]
Message-ID: <19981123215359.45625@boole.suse.de> (raw)
In-Reply-To: <199811231713.RAA17361@dax.scot.redhat.com>; from Stephen C. Tweedie on Mon, Nov 23, 1998 at 05:13:34PM +0000

[...]

> > Maybe we even want to keep a 3:1 ratio or something like that for
> > mapped:swap_cached pages and a semi- FIFO reclamation of swap cached
> > pages so we can simulate a bit of (very cheap) page aging.
> 
> I will just restate my profound conviction that any VM balancing which
> works by imposing precalculated limits on resources is fundamentally
> wrong.
> 
> Cheers,
>   Stephen

I've done some simply test and worked out some changes (patch enclosed).
Starting with a plain 2.1.129 I've run a simple stress
situation:

       * 64MB ram + 128 MB swap
       * Under X11 (fvwm2)
       * xload
       * xosview
       * xterm running top
       * xterm running tail -f /var/log/warn /var/log/messages
       * xterm compiling 2.0.36 sources with:
             while true; do make clean; make -j || break ; done
       * xterm compiling 2.1.129 sources with:
             while true; do make clean; make MAKE='make -j5' || break ; done


.. clearly all together.  Load goes upto 30 and higher and random SIGBUS
to random processes occurs (in best case the X server was signaled which
makes the system usable again).

I've add some changes:

       * changed the position of deleting pages from
         swap cache in mm/filemap.c::shrink_one_page()
       * add a simple repeat case in
         mm/page_alloc.c::__get_free_pages() if we wait
         on low priority pages (aka GFP_USER).
       * don't let mm/vmscan.c::try_to_free_pages()
         scan to much.
       * add a simple age scheme for recently swapped in
         pages. (The condition, e.g. a bigger rss window
         is changeable).

The random SIGBUS disappears and the system seems more usable
which means only loads over 35 and higher makes the system
only temporarily unusable.


            Werner

--------------------------------------------------------------------
diff -urN linux-2.1.129/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.1.129/include/linux/mm.h	Thu Nov 19 20:49:37 1998
+++ linux/include/linux/mm.h	Mon Nov 23 14:53:14 1998
@@ -117,7 +117,7 @@
 	unsigned long offset;
 	struct page *next_hash;
 	atomic_t count;
-	unsigned int unused;
+	unsigned int lifetime;
 	unsigned long flags;	/* atomic flags, some possibly updated asynchronously */
 	struct wait_queue *wait;
 	struct page **pprev_hash;
diff -urN linux-2.1.129/ipc/shm.c linux/ipc/shm.c
--- linux-2.1.129/ipc/shm.c	Sun Oct 18 00:52:18 1998
+++ linux/ipc/shm.c	Mon Nov 23 15:14:00 1998
@@ -15,6 +15,7 @@
 #include <linux/stat.h>
 #include <linux/malloc.h>
 #include <linux/swap.h>
+#include <linux/swapctl.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
@@ -656,6 +657,7 @@
 
 	pte = __pte(shp->shm_pages[idx]);
 	if (!pte_present(pte)) {
+		int old_rss = shm_rss;
 		unsigned long page = get_free_page(GFP_KERNEL);
 		if (!page) {
 			oom(current);
@@ -677,6 +679,16 @@
 			shm_swp--;
 		}
 		shm_rss++;
+
+		/* Increase life time of the page */
+		mem_map[MAP_NR(page)].lifetime = 0;
+		if (old_rss == 0) 
+			current->dec_flt++;
+		if (current->dec_flt > 3) {
+			mem_map[MAP_NR(page)].lifetime = 3 * PAGE_ADVANCE;
+			current->dec_flt = 0;
+		}
+
 		pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
 		shp->shm_pages[idx] = pte_val(pte);
 	} else
diff -urN linux-2.1.129/mm/filemap.c linux/mm/filemap.c
--- linux-2.1.129/mm/filemap.c	Thu Nov 19 20:44:18 1998
+++ linux/mm/filemap.c	Mon Nov 23 13:38:47 1998
@@ -167,15 +167,14 @@
 	case 1:
 		/* is it a swap-cache or page-cache page? */
 		if (page->inode) {
-			/* Throw swap-cache pages away more aggressively */
-			if (PageSwapCache(page)) {
-				delete_from_swap_cache(page);
-				return 1;
-			}
 			if (test_and_clear_bit(PG_referenced, &page->flags))
 				break;
 			if (pgcache_under_min())
 				break;
+			if (PageSwapCache(page)) {
+				delete_from_swap_cache(page);
+				return 1;
+			}
 			remove_inode_page(page);
 			return 1;
 		}
diff -urN linux-2.1.129/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.1.129/mm/page_alloc.c	Thu Nov 19 20:44:18 1998
+++ linux/mm/page_alloc.c	Mon Nov 23 19:31:10 1998
@@ -236,6 +236,7 @@
 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
 {
 	unsigned long flags;
+	int loop = 0;
 
 	if (order >= NR_MEM_LISTS)
 		goto nopage;
@@ -262,6 +263,7 @@
 				goto nopage;
 		}
 	}
+repeat:
 	spin_lock_irqsave(&page_alloc_lock, flags);
 	RMQUEUE(order, (gfp_mask & GFP_DMA));
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
@@ -274,6 +276,8 @@
 	if (gfp_mask & __GFP_WAIT) {
 		current->policy |= SCHED_YIELD;
 		schedule();
+		if (!loop++ && nr_free_pages > freepages.low)
+			goto repeat;
 	}
 
 nopage:
@@ -380,6 +384,7 @@
 {
 	unsigned long page;
 	struct page *page_map;
+	int shared, old_rss = vma->vm_mm->rss;
 	
 	page_map = read_swap_cache(entry);
 
@@ -399,8 +404,18 @@
 	vma->vm_mm->rss++;
 	tsk->min_flt++;
 	swap_free(entry);
+	shared = is_page_shared(page_map);
 
-	if (!write_access || is_page_shared(page_map)) {
+	/* Increase life time of the page */
+	page_map->lifetime = 0;
+	if (old_rss == 0)
+		tsk->dec_flt++;
+	if (tsk->dec_flt > 3) {
+		page_map->lifetime = (shared ? 2 : 5) * PAGE_ADVANCE;
+		tsk->dec_flt = 0;
+	}
+
+	if (!write_access || shared) {
 		set_pte(page_table, mk_pte(page, vma->vm_page_prot));
 		return;
 	}
diff -urN linux-2.1.129/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.1.129/mm/vmscan.c	Thu Nov 19 20:44:18 1998
+++ linux/mm/vmscan.c	Mon Nov 23 19:34:21 1998
@@ -131,12 +131,21 @@
 		return 0;
 	}
 
+	/* life time decay */
+	if (page_map->lifetime > PAGE_DECLINE)
+		page_map->lifetime -= PAGE_DECLINE;
+	else
+		page_map->lifetime = 0;
+	if (page_map->lifetime)
+		return 0;
+
 	if (pte_dirty(pte)) {
 		if (vma->vm_ops && vma->vm_ops->swapout) {
 			pid_t pid = tsk->pid;
 			vma->vm_mm->rss--;
-			if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
+			if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table)) {
 				kill_proc(pid, SIGBUS, 1);
+			}
 		} else {
 			/*
 			 * This is a dirty, swappable page.  First of all,
@@ -561,6 +570,7 @@
 int try_to_free_pages(unsigned int gfp_mask, int count)
 {
 	int retval = 1;
+	int is_dma = (gfp_mask & __GFP_DMA);
 
 	lock_kernel();
 	if (!(current->flags & PF_MEMALLOC)) {
@@ -568,6 +578,8 @@
 		do {
 			retval = do_try_to_free_page(gfp_mask);
 			if (!retval)
+				break;
+			if (!is_dma && nr_free_pages > freepages.high + SWAP_CLUSTER_MAX)
 				break;
 			count--;
 		} while (count > 0);
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

  parent reply	other threads:[~1998-11-23 20:56 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <Pine.LNX.3.95.981119002335.838A-100000@penguin.transmeta.com>
1998-11-19 21:34 ` Linux-2.1.129 Dr. Werner Fink
1998-11-19 21:58   ` Linux-2.1.129 Rik van Riel
1998-11-20 12:09     ` Linux-2.1.129 Dr. Werner Fink
1998-11-19 22:33   ` Linux-2.1.129 Linus Torvalds
1998-11-23 17:13     ` Linux-2.1.129 Stephen C. Tweedie
1998-11-23 19:16       ` Linux-2.1.129 Eric W. Biederman
1998-11-23 20:02         ` Linux-2.1.129 Linus Torvalds
1998-11-23 21:25           ` Linux-2.1.129 Rik van Riel
1998-11-23 22:19           ` Linux-2.1.129 Dr. Werner Fink
1998-11-24  3:37           ` Linux-2.1.129 Eric W. Biederman
1998-11-24 15:25           ` Linux-2.1.129 Stephen C. Tweedie
1998-11-24 17:33             ` Linux-2.1.129 Linus Torvalds
1998-11-24 19:59               ` Linux-2.1.129 Rik van Riel
1998-11-24 20:45                 ` Linux-2.1.129 Linus Torvalds
1998-11-25 14:19               ` Linux-2.1.129 Stephen C. Tweedie
1998-11-25 21:07                 ` Linux-2.1.129 Eric W. Biederman
1998-11-26 12:57                   ` Linux-2.1.129 Stephen C. Tweedie
1998-11-25 20:33             ` Linux-2.1.129 Zlatko Calusic
1998-11-23 19:46       ` Linux-2.1.129 Eric W. Biederman
1998-11-23 21:18         ` Linux-2.1.129 Rik van Riel
1998-11-24  6:28           ` Linux-2.1.129 Eric W. Biederman
1998-11-24  7:56             ` Linux-2.1.129 Rik van Riel
1998-11-24 15:48             ` Linux-2.1.129 Stephen C. Tweedie
1998-11-24 15:38         ` Linux-2.1.129 Stephen C. Tweedie
1998-11-23 20:12       ` Linux-2.1.129 Rik van Riel
1998-11-23 20:53       ` Dr. Werner Fink [this message]
1998-11-23 21:59         ` Running 2.1.129 at extrem load [patch] (Was: Linux-2.1.129..) Rik van Riel
1998-11-23 22:35           ` Dr. Werner Fink
1998-11-24 12:38             ` Dr. Werner Fink

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=19981123215359.45625@boole.suse.de \
    --to=werner@suse.de \
    --cc=H.H.vanRiel@phys.uu.nl \
    --cc=linux-kernel@vger.rutgers.edu \
    --cc=linux-mm@kvack.org \
    --cc=sct@redhat.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox