Re: test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Molnar Ingo <mingo@debella.aszi.sztaki.hu>
To: Rik van Riel <riel@conectiva.com.br>
Cc: Molnar Ingo <mingo@debella.ikk.sztaki.hu>,
	"David S. Miller" <davem@redhat.com>,
	torvalds@transmeta.com, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload
Date: Fri, 22 Sep 2000 11:34:11 +0200 (CEST)	[thread overview]
Message-ID: <Pine.LNX.4.21.0009221131110.12532-200000@debella.aszi.sztaki.hu> (raw)
In-Reply-To: <Pine.LNX.4.21.0009221113130.12532-100000@debella.aszi.sztaki.hu>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 145 bytes --]


yep this has done the trick, the deadlock is gone. I've attached the full
VM-fixes patch (this fix included) against vanilla test9-pre5.

	Ingo

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6536 bytes --]

--- linux/fs/buffer.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/fs/buffer.c	Fri Sep 22 02:31:13 2000
@@ -706,9 +706,7 @@
 static void refill_freelist(int size)
 {
 	if (!grow_buffers(size)) {
-		balance_dirty(NODEV);
-		wakeup_kswapd(0); /* We can't wait because of __GFP_IO */
-		schedule();
+		try_to_free_pages(GFP_BUFFER);
 	}
 }
 
--- linux/mm/filemap.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/filemap.c	Fri Sep 22 02:31:13 2000
@@ -255,7 +255,7 @@
 	 * up kswapd.
 	 */
 	age_page_up(page);
-	if (inactive_shortage() > (inactive_target * 3) / 4)
+	if (inactive_shortage() > inactive_target / 2 && free_shortage())
 			wakeup_kswapd(0);
 not_found:
 	return page;
--- linux/mm/page_alloc.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/page_alloc.c	Fri Sep 22 02:31:13 2000
@@ -444,7 +444,8 @@
 		 * processes, etc).
 		 */
 		if (gfp_mask & __GFP_WAIT) {
-			wakeup_kswapd(1);
+			try_to_free_pages(gfp_mask);
+			memory_pressure++;
 			goto try_again;
 		}
 	}
--- linux/mm/swap.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/swap.c	Fri Sep 22 02:31:13 2000
@@ -233,27 +233,11 @@
 	spin_lock(&pagemap_lru_lock);
 	if (!PageLocked(page))
 		BUG();
-	/*
-	 * Heisenbug Compensator(tm)
-	 * This bug shouldn't trigger, but for unknown reasons it
-	 * sometimes does. If there are no signs of list corruption,
-	 * we ignore the problem. Else we BUG()...
-	 */
-	if (PageActive(page) || PageInactiveDirty(page) ||
-					PageInactiveClean(page)) {
-		struct list_head * page_lru = &page->lru;
-		if (page_lru->next->prev != page_lru) {
-			printk("VM: lru_cache_add, bit or list corruption..\n");
-			BUG();
-		}
-		printk("VM: lru_cache_add, page already in list!\n");
-		goto page_already_on_list;
-	}
+	DEBUG_ADD_PAGE
 	add_page_to_active_list(page);
 	/* This should be relatively rare */
 	if (!page->age)
 		deactivate_page_nolock(page);
-page_already_on_list:
 	spin_unlock(&pagemap_lru_lock);
 }
 
--- linux/mm/vmscan.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/vmscan.c	Fri Sep 22 02:31:27 2000
@@ -377,7 +377,7 @@
 #define SWAP_SHIFT 5
 #define SWAP_MIN 8
 
-static int swap_out(unsigned int priority, int gfp_mask)
+static int swap_out(unsigned int priority, int gfp_mask, unsigned long idle_time)
 {
 	struct task_struct * p;
 	int counter;
@@ -407,6 +407,7 @@
 		struct mm_struct *best = NULL;
 		int pid = 0;
 		int assign = 0;
+		int found_task = 0;
 	select:
 		read_lock(&tasklist_lock);
 		p = init_task.next_task;
@@ -416,6 +417,11 @@
 				continue;
 	 		if (mm->rss <= 0)
 				continue;
+			/* Skip tasks which haven't slept long enough yet when idle-swapping. */
+			if (idle_time && !assign && (!(p->state & TASK_INTERRUPTIBLE) ||
+					time_before(p->sleep_time + idle_time * HZ, jiffies)))
+				continue;
+			found_task++;
 			/* Refresh swap_cnt? */
 			if (assign == 1) {
 				mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
@@ -430,7 +436,7 @@
 		}
 		read_unlock(&tasklist_lock);
 		if (!best) {
-			if (!assign) {
+			if (!assign && found_task > 0) {
 				assign = 1;
 				goto select;
 			}
@@ -691,9 +697,9 @@
 			 * Now the page is really freeable, so we
 			 * move it to the inactive_clean list.
 			 */
-			UnlockPage(page);
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_inactive_clean_list(page);
+			UnlockPage(page);
 			cleaned_pages++;
 		} else {
 			/*
@@ -701,9 +707,9 @@
 			 * It's no use keeping it here, so we move it to
 			 * the active list.
 			 */
-			UnlockPage(page);
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_active_list(page);
+			UnlockPage(page);
 		}
 	}
 	spin_unlock(&pagemap_lru_lock);
@@ -860,6 +866,7 @@
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
 	int priority, count, start_count, made_progress;
+	unsigned long idle_time;
 
 	count = inactive_shortage() + free_shortage();
 	if (user)
@@ -869,16 +876,28 @@
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
+	/*
+	 * Calculate the minimum time (in seconds) a process must
+	 * have slept before we consider it for idle swapping.
+	 * This must be the number of seconds it takes to go through
+	 * all of the cache. Doing this idle swapping makes the VM
+	 * smoother once we start hitting swap.
+	 */
+	idle_time = atomic_read(&page_cache_size);
+	idle_time += atomic_read(&buffermem_pages);
+	idle_time /= (inactive_target + 1);
+
 	priority = 6;
 	do {
 		made_progress = 0;
 
-		if (current->need_resched) {
+		if (current->need_resched && (gfp_mask & __GFP_IO)) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
 		}
 
-		while (refill_inactive_scan(priority, 1)) {
+		while (refill_inactive_scan(priority, 1) ||
+				swap_out(priority, gfp_mask, idle_time)) {
 			made_progress = 1;
 			if (!--count)
 				goto done;
@@ -913,7 +932,7 @@
 		/*
 		 * Then, try to page stuff out..
 		 */
-		while (swap_out(priority, gfp_mask)) {
+		while (swap_out(priority, gfp_mask, 0)) {
 			made_progress = 1;
 			if (!--count)
 				goto done;
@@ -963,7 +982,8 @@
 	 * before we get around to moving them to the other
 	 * list, so this is a relatively cheap operation.
 	 */
-	if (free_shortage())
+	if (free_shortage() || nr_inactive_dirty_pages > nr_free_pages() +
+			nr_inactive_clean_pages())
 		ret += page_launder(gfp_mask, user);
 
 	/*
@@ -1070,9 +1090,12 @@
 		run_task_queue(&tq_disk);
 
 		/* 
-		 * If we've either completely gotten rid of the
-		 * free page shortage or the inactive page shortage
-		 * is getting low, then stop eating CPU time.
+		 * We go to sleep if either the free page shortage
+		 * or the inactive page shortage is gone. We do this
+		 * because:
+		 * 1) we need no more free pages   or
+		 * 2) the inactive pages need to be flushed to disk,
+		 *    it wouldn't help to eat CPU time now ...
 		 *
 		 * We go to sleep for one second, but if it's needed
 		 * we'll be woken up earlier...
--- linux/include/linux/sched.h.orig	Fri Sep 22 02:31:04 2000
+++ linux/include/linux/sched.h	Fri Sep 22 02:31:13 2000
@@ -298,6 +298,7 @@
 	 * that's just fine.)
 	 */
 	struct list_head run_list;
+	unsigned long sleep_time;
 
 	struct task_struct *next_task, *prev_task;
 	struct mm_struct *active_mm;
@@ -818,6 +819,7 @@
 static inline void del_from_runqueue(struct task_struct * p)
 {
 	nr_running--;
+	p->sleep_time = jiffies;
 	list_del(&p->run_list);
 	p->run_list.next = NULL;
 }

next prev parent reply	other threads:[~2000-09-22  9:34 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2000-09-21 16:44 [patch *] VM deadlock fix Rik van Riel
2000-09-21 20:28 ` Roger Larsson
2000-09-21 23:31   ` Problem remains - page_launder? (Was: Re: [patch *] VM deadlock fix) Roger Larsson
2000-09-21 22:23 ` [patch *] VM deadlock fix David S. Miller
2000-09-22  0:18   ` Andrea Arcangeli
2000-09-21 23:57     ` David S. Miller
2000-09-22  8:39   ` Rik van Riel
2000-09-22  8:54     ` test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload Molnar Ingo
2000-09-22  9:00       ` Molnar Ingo
2000-09-22  9:08       ` Rik van Riel
2000-09-22  9:14         ` Molnar Ingo
2000-09-22  9:34           ` Molnar Ingo [this message]
2000-09-22 10:27             ` Rik van Riel
2000-09-22 13:10               ` André Dahlqvist
2000-09-22 14:10                 ` André Dahlqvist
2000-09-22 16:38                   ` test9-pre3+t9p2-vmpatch VM deadlock during socket I/O Yuri Pudgorodsky
2000-09-22 16:20                 ` test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload Mohammad A. Haque
2000-09-22 17:39       ` Linus Torvalds
2000-09-25 13:47         ` Rik van Riel
2000-09-22 12:16 ` [patch *] VM deadlock fix Martin Diehl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.21.0009221131110.12532-200000@debella.aszi.sztaki.hu \
    --to=mingo@debella.aszi.sztaki.hu \
    --cc=davem@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@debella.ikk.sztaki.hu \
    --cc=riel@conectiva.com.br \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox