Re: test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Rik van Riel <riel@conectiva.com.br>
To: Molnar Ingo <mingo@debella.ikk.sztaki.hu>
Cc: "David S. Miller" <davem@redhat.com>,
	torvalds@transmeta.com, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload
Date: Fri, 22 Sep 2000 07:27:30 -0300 (BRST)	[thread overview]
Message-ID: <Pine.LNX.4.21.0009220725590.4442-200000@duckman.distro.conectiva> (raw)
In-Reply-To: <Pine.LNX.4.21.0009221131110.12532-200000@debella.aszi.sztaki.hu>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 543 bytes --]

On Fri, 22 Sep 2000, Molnar Ingo wrote:

> yep this has done the trick, the deadlock is gone. I've attached the full
> VM-fixes patch (this fix included) against vanilla test9-pre5.

Linus,

could you please include this patch in the next
pre patch?

(in the mean time, I'll go back to looking at the
balancing thing with shared memory ... which is
unrelated to this deadlock problem)

thanks,

Rik
--
"What you're running that piece of shit Gnome?!?!"
       -- Miguel de Icaza, UKUUG 2000

http://www.conectiva.com/		http://www.surriel.com/

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6536 bytes --]

--- linux/fs/buffer.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/fs/buffer.c	Fri Sep 22 02:31:13 2000
@@ -706,9 +706,7 @@
 static void refill_freelist(int size)
 {
 	if (!grow_buffers(size)) {
-		balance_dirty(NODEV);
-		wakeup_kswapd(0); /* We can't wait because of __GFP_IO */
-		schedule();
+		try_to_free_pages(GFP_BUFFER);
 	}
 }
 
--- linux/mm/filemap.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/filemap.c	Fri Sep 22 02:31:13 2000
@@ -255,7 +255,7 @@
 	 * up kswapd.
 	 */
 	age_page_up(page);
-	if (inactive_shortage() > (inactive_target * 3) / 4)
+	if (inactive_shortage() > inactive_target / 2 && free_shortage())
 			wakeup_kswapd(0);
 not_found:
 	return page;
--- linux/mm/page_alloc.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/page_alloc.c	Fri Sep 22 02:31:13 2000
@@ -444,7 +444,8 @@
 		 * processes, etc).
 		 */
 		if (gfp_mask & __GFP_WAIT) {
-			wakeup_kswapd(1);
+			try_to_free_pages(gfp_mask);
+			memory_pressure++;
 			goto try_again;
 		}
 	}
--- linux/mm/swap.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/swap.c	Fri Sep 22 02:31:13 2000
@@ -233,27 +233,11 @@
 	spin_lock(&pagemap_lru_lock);
 	if (!PageLocked(page))
 		BUG();
-	/*
-	 * Heisenbug Compensator(tm)
-	 * This bug shouldn't trigger, but for unknown reasons it
-	 * sometimes does. If there are no signs of list corruption,
-	 * we ignore the problem. Else we BUG()...
-	 */
-	if (PageActive(page) || PageInactiveDirty(page) ||
-					PageInactiveClean(page)) {
-		struct list_head * page_lru = &page->lru;
-		if (page_lru->next->prev != page_lru) {
-			printk("VM: lru_cache_add, bit or list corruption..\n");
-			BUG();
-		}
-		printk("VM: lru_cache_add, page already in list!\n");
-		goto page_already_on_list;
-	}
+	DEBUG_ADD_PAGE
 	add_page_to_active_list(page);
 	/* This should be relatively rare */
 	if (!page->age)
 		deactivate_page_nolock(page);
-page_already_on_list:
 	spin_unlock(&pagemap_lru_lock);
 }
 
--- linux/mm/vmscan.c.orig	Fri Sep 22 02:31:07 2000
+++ linux/mm/vmscan.c	Fri Sep 22 02:31:27 2000
@@ -377,7 +377,7 @@
 #define SWAP_SHIFT 5
 #define SWAP_MIN 8
 
-static int swap_out(unsigned int priority, int gfp_mask)
+static int swap_out(unsigned int priority, int gfp_mask, unsigned long idle_time)
 {
 	struct task_struct * p;
 	int counter;
@@ -407,6 +407,7 @@
 		struct mm_struct *best = NULL;
 		int pid = 0;
 		int assign = 0;
+		int found_task = 0;
 	select:
 		read_lock(&tasklist_lock);
 		p = init_task.next_task;
@@ -416,6 +417,11 @@
 				continue;
 	 		if (mm->rss <= 0)
 				continue;
+			/* Skip tasks which haven't slept long enough yet when idle-swapping. */
+			if (idle_time && !assign && (!(p->state & TASK_INTERRUPTIBLE) ||
+					time_before(p->sleep_time + idle_time * HZ, jiffies)))
+				continue;
+			found_task++;
 			/* Refresh swap_cnt? */
 			if (assign == 1) {
 				mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
@@ -430,7 +436,7 @@
 		}
 		read_unlock(&tasklist_lock);
 		if (!best) {
-			if (!assign) {
+			if (!assign && found_task > 0) {
 				assign = 1;
 				goto select;
 			}
@@ -691,9 +697,9 @@
 			 * Now the page is really freeable, so we
 			 * move it to the inactive_clean list.
 			 */
-			UnlockPage(page);
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_inactive_clean_list(page);
+			UnlockPage(page);
 			cleaned_pages++;
 		} else {
 			/*
@@ -701,9 +707,9 @@
 			 * It's no use keeping it here, so we move it to
 			 * the active list.
 			 */
-			UnlockPage(page);
 			del_page_from_inactive_dirty_list(page);
 			add_page_to_active_list(page);
+			UnlockPage(page);
 		}
 	}
 	spin_unlock(&pagemap_lru_lock);
@@ -860,6 +866,7 @@
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
 	int priority, count, start_count, made_progress;
+	unsigned long idle_time;
 
 	count = inactive_shortage() + free_shortage();
 	if (user)
@@ -869,16 +876,28 @@
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
+	/*
+	 * Calculate the minimum time (in seconds) a process must
+	 * have slept before we consider it for idle swapping.
+	 * This must be the number of seconds it takes to go through
+	 * all of the cache. Doing this idle swapping makes the VM
+	 * smoother once we start hitting swap.
+	 */
+	idle_time = atomic_read(&page_cache_size);
+	idle_time += atomic_read(&buffermem_pages);
+	idle_time /= (inactive_target + 1);
+
 	priority = 6;
 	do {
 		made_progress = 0;
 
-		if (current->need_resched) {
+		if (current->need_resched && (gfp_mask & __GFP_IO)) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
 		}
 
-		while (refill_inactive_scan(priority, 1)) {
+		while (refill_inactive_scan(priority, 1) ||
+				swap_out(priority, gfp_mask, idle_time)) {
 			made_progress = 1;
 			if (!--count)
 				goto done;
@@ -913,7 +932,7 @@
 		/*
 		 * Then, try to page stuff out..
 		 */
-		while (swap_out(priority, gfp_mask)) {
+		while (swap_out(priority, gfp_mask, 0)) {
 			made_progress = 1;
 			if (!--count)
 				goto done;
@@ -963,7 +982,8 @@
 	 * before we get around to moving them to the other
 	 * list, so this is a relatively cheap operation.
 	 */
-	if (free_shortage())
+	if (free_shortage() || nr_inactive_dirty_pages > nr_free_pages() +
+			nr_inactive_clean_pages())
 		ret += page_launder(gfp_mask, user);
 
 	/*
@@ -1070,9 +1090,12 @@
 		run_task_queue(&tq_disk);
 
 		/* 
-		 * If we've either completely gotten rid of the
-		 * free page shortage or the inactive page shortage
-		 * is getting low, then stop eating CPU time.
+		 * We go to sleep if either the free page shortage
+		 * or the inactive page shortage is gone. We do this
+		 * because:
+		 * 1) we need no more free pages   or
+		 * 2) the inactive pages need to be flushed to disk,
+		 *    it wouldn't help to eat CPU time now ...
 		 *
 		 * We go to sleep for one second, but if it's needed
 		 * we'll be woken up earlier...
--- linux/include/linux/sched.h.orig	Fri Sep 22 02:31:04 2000
+++ linux/include/linux/sched.h	Fri Sep 22 02:31:13 2000
@@ -298,6 +298,7 @@
 	 * that's just fine.)
 	 */
 	struct list_head run_list;
+	unsigned long sleep_time;
 
 	struct task_struct *next_task, *prev_task;
 	struct mm_struct *active_mm;
@@ -818,6 +819,7 @@
 static inline void del_from_runqueue(struct task_struct * p)
 {
 	nr_running--;
+	p->sleep_time = jiffies;
 	list_del(&p->run_list);
 	p->run_list.next = NULL;
 }

next prev parent reply	other threads:[~2000-09-22 10:27 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2000-09-21 16:44 [patch *] VM deadlock fix Rik van Riel
2000-09-21 20:28 ` Roger Larsson
2000-09-21 23:31   ` Problem remains - page_launder? (Was: Re: [patch *] VM deadlock fix) Roger Larsson
2000-09-21 22:23 ` [patch *] VM deadlock fix David S. Miller
2000-09-22  0:18   ` Andrea Arcangeli
2000-09-21 23:57     ` David S. Miller
2000-09-22  8:39   ` Rik van Riel
2000-09-22  8:54     ` test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload Molnar Ingo
2000-09-22  9:00       ` Molnar Ingo
2000-09-22  9:08       ` Rik van Riel
2000-09-22  9:14         ` Molnar Ingo
2000-09-22  9:34           ` Molnar Ingo
2000-09-22 10:27             ` Rik van Riel [this message]
2000-09-22 13:10               ` André Dahlqvist
2000-09-22 14:10                 ` André Dahlqvist
2000-09-22 16:38                   ` test9-pre3+t9p2-vmpatch VM deadlock during socket I/O Yuri Pudgorodsky
2000-09-22 16:20                 ` test9-pre5+t9p2-vmpatch VM deadlock during write-intensive workload Mohammad A. Haque
2000-09-22 17:39       ` Linus Torvalds
2000-09-25 13:47         ` Rik van Riel
2000-09-22 12:16 ` [patch *] VM deadlock fix Martin Diehl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.21.0009220725590.4442-200000@duckman.distro.conectiva \
    --to=riel@conectiva.com.br \
    --cc=davem@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@debella.ikk.sztaki.hu \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox