Re: [RFC][PATCH] Re: Linux 2.4.4-ac10

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Mike Galbraith <mikeg@wen-online.de>
To: Rik van Riel <riel@conectiva.com.br>
Cc: "Stephen C. Tweedie" <sct@redhat.com>,
	Ingo Oeser <ingo.oeser@informatik.tu-chemnitz.de>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC][PATCH] Re: Linux 2.4.4-ac10
Date: Thu, 24 May 2001 10:48:48 +0200 (CEST)	[thread overview]
Message-ID: <Pine.LNX.4.33.0105241041100.369-100000@mikeg.weiden.de> (raw)
In-Reply-To: <Pine.LNX.4.21.0105200546241.5531-100000@imladris.rielhome.conectiva>

On Sun, 20 May 2001, Rik van Riel wrote:

> Remember that inactive_clean pages are always immediately
> reclaimable by __alloc_pages(), if you measured a performance
> difference by freeing pages in a different way I'm pretty sure
> it's a side effect of something else.  What that something
> else is I'm curious to find out, but I'm pretty convinced that
> throwing away data early isn't the way to go.

OK.. let's forget about throughput for a moment and consider
those annoying reports of 0 order allocations failing :)

What do you think of the below (ignore the refill_inactive bit)
wrt allocator reliability under heavy stress?  The thing does
kick in and pump up zones even if I set the 'blood donor' level
to pages_min.

	-Mike

--- linux-2.4.5-pre3/mm/page_alloc.c.org	Mon May 21 10:35:06 2001
+++ linux-2.4.5-pre3/mm/page_alloc.c	Thu May 24 08:18:36 2001
@@ -224,10 +224,11 @@
 			unsigned long order, int limit, int direct_reclaim)
 {
 	zone_t **zone = zonelist->zones;
+	struct page *page = NULL;

 	for (;;) {
 		zone_t *z = *(zone++);
-		unsigned long water_mark;
+		unsigned long water_mark = 1 << order;

 		if (!z)
 			break;
@@ -249,18 +250,44 @@
 			case PAGES_HIGH:
 				water_mark = z->pages_high;
 		}
+		if (z->free_pages + z->inactive_clean_pages < water_mark)
+			continue;

-		if (z->free_pages + z->inactive_clean_pages > water_mark) {
-			struct page *page = NULL;
-			/* If possible, reclaim a page directly. */
-			if (direct_reclaim && z->free_pages < z->pages_min + 8)
+		if (direct_reclaim) {
+			int count;
+
+			/* If we're in bad shape.. */
+			if (z->free_pages < z->pages_low && z->inactive_clean_pages) {
+				count = 4 * (1 << page_cluster);
+				/* reclaim a page for ourselves if we can afford to.. */
+				if (z->inactive_clean_pages > count)
+					page = reclaim_page(z);
+				if (z->inactive_clean_pages < 2 * count)
+					count = z->inactive_clean_pages / 2;
+			} else count = 0;
+
+			/*
+			 * and make a small donation to the reclaim challenged.
+			 *
+			 * We don't ever want a zone to reach the state where we
+			 * have nothing except reclaimable pages left.. not if
+			 * we can possibly do something to help prevent it.
+			 */
+			while (count--) {
+				struct page *page;
 				page = reclaim_page(z);
-			/* If that fails, fall back to rmqueue. */
-			if (!page)
-				page = rmqueue(z, order);
-			if (page)
-				return page;
+				if (!page)
+					break;
+				__free_page(page);
+			}
 		}
+		if (!page)
+			page = rmqueue(z, order);
+		if (page)
+			return page;
+		if (z->inactive_clean_pages - z->free_pages > z->pages_low
+				&& waitqueue_active(&kreclaimd_wait))
+			wake_up_interruptible(&kreclaimd_wait);
 	}

 	/* Found nothing. */
@@ -314,29 +341,6 @@
 		wakeup_bdflush(0);

 try_again:
-	/*
-	 * First, see if we have any zones with lots of free memory.
-	 *
-	 * We allocate free memory first because it doesn't contain
-	 * any data ... DUH!
-	 */
-	zone = zonelist->zones;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
-		if (!z->size)
-			BUG();
-
-		if (z->free_pages >= z->pages_low) {
-			page = rmqueue(z, order);
-			if (page)
-				return page;
-		} else if (z->free_pages < z->pages_min &&
-					waitqueue_active(&kreclaimd_wait)) {
-				wake_up_interruptible(&kreclaimd_wait);
-		}
-	}

 	/*
 	 * Try to allocate a page from a zone with a HIGH
--- linux-2.4.5-pre3/mm/vmscan.c.org	Thu May 17 16:44:23 2001
+++ linux-2.4.5-pre3/mm/vmscan.c	Thu May 24 08:05:21 2001
@@ -824,39 +824,17 @@
 #define DEF_PRIORITY (6)
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
-	int count, start_count, maxtry;
-
-	if (user) {
-		count = (1 << page_cluster);
-		maxtry = 6;
-	} else {
-		count = inactive_shortage();
-		maxtry = 1 << DEF_PRIORITY;
-	}
-
-	start_count = count;
-	do {
-		if (current->need_resched) {
-			__set_current_state(TASK_RUNNING);
-			schedule();
-			if (!inactive_shortage())
-				return 1;
-		}
-
-		count -= refill_inactive_scan(DEF_PRIORITY, count);
-		if (count <= 0)
-			goto done;
-
-		/* If refill_inactive_scan failed, try to page stuff out.. */
-		swap_out(DEF_PRIORITY, gfp_mask);
-
-		if (--maxtry <= 0)
-				return 0;
-
-	} while (inactive_shortage());
-
-done:
-	return (count < start_count);
+	int shortage = inactive_shortage();
+	int large = freepages.high/2;
+	int scale;
+
+	scale = shortage/large;
+	scale += free_shortage()/large;
+	if (scale > DEF_PRIORITY-1)
+		scale = DEF_PRIORITY-1;
+	if (refill_inactive_scan(DEF_PRIORITY-scale, shortage) < shortage)
+		return swap_out(DEF_PRIORITY, gfp_mask);
+	return 1;
 }

 static int do_try_to_free_pages(unsigned int gfp_mask, int user)
@@ -976,8 +954,9 @@
 		 * We go to sleep for one second, but if it's needed
 		 * we'll be woken up earlier...
 		 */
-		if (!free_shortage() || !inactive_shortage()) {
-			interruptible_sleep_on_timeout(&kswapd_wait, HZ);
+		if (current->need_resched || !free_shortage() ||
+				!inactive_shortage()) {
+			interruptible_sleep_on_timeout(&kswapd_wait, HZ/10);
 		/*
 		 * If we couldn't free enough memory, we see if it was
 		 * due to the system just not having enough memory.
@@ -1051,10 +1030,13 @@
 			int i;
 			for(i = 0; i < MAX_NR_ZONES; i++) {
 				zone_t *zone = pgdat->node_zones + i;
+				int count;
 				if (!zone->size)
 					continue;

-				while (zone->free_pages < zone->pages_low) {
+				count = zone->pages_low;
+				while (zone->free_pages < zone->inactive_clean_pages &&
+						count--) {
 					struct page * page;
 					page = reclaim_page(zone);
 					if (!page)
@@ -1064,6 +1046,9 @@
 			}
 			pgdat = pgdat->node_next;
 		} while (pgdat);
+#if 1
+		run_task_queue(&tq_disk);
+#endif
 	}
 }


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

next prev parent reply	other threads:[~2001-05-24  8:48 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <Pine.LNX.4.21.0105181403280.5531-100000@imladris.rielhome.conectiva>
     [not found] ` <Pine.LNX.4.33.0105181936240.583-100000@mikeg.weiden.de>
2001-05-18 18:19   ` Ingo Oeser
2001-05-18 18:23     ` Rik van Riel
2001-05-18 18:58       ` Ingo Oeser
2001-05-18 20:12         ` Rik van Riel
2001-05-18 20:24         ` Mike Galbraith
2001-05-18 20:09       ` Mike Galbraith
2001-05-18 22:44         ` Rik van Riel
2001-05-18 22:58           ` Stephen C. Tweedie
2001-05-19  2:12             ` Rik van Riel
2001-05-19  2:32               ` Mike Castle
2001-05-19  6:45               ` Mike Galbraith
2001-05-19  4:40             ` Mike Galbraith
2001-05-19 17:13             ` [RFC][PATCH] " Mike Galbraith
2001-05-19 21:41               ` Rik van Riel
2001-05-20  3:29                 ` Mike Galbraith
2001-05-20  6:42                   ` Rik van Riel
2001-05-20  8:08                     ` Mike Galbraith
     [not found]                       ` <Pine.LNX.4.21.0105200546241.5531-100000@imladris.rielhome.conectiva>
2001-05-20  9:47                         ` Mike Galbraith
     [not found]                           ` <Pine.LNX.4.21.0105200703270.5531-100000@imladris.rielhome.conectiva>
2001-05-21 13:36                             ` Stephen C. Tweedie
2001-05-20 21:54                         ` Pavel Machek
2001-05-21 20:32                           ` David Weinehall
2001-05-23 15:34                             ` Rik van Riel
2001-05-23 17:24                               ` Jonathan Morton
2001-05-25  8:39                               ` Pavel Machek
2001-05-23 17:51                             ` Scott Anderson
2001-05-25  8:10                               ` David Weinehall
2001-05-25 18:39                               ` Pavel Machek
2001-06-04 19:22                           ` RPM Installation - Compilation errors jalaja devi
2001-05-24  8:48                         ` Mike Galbraith [this message]
2001-05-24  9:10                           ` [RFC][PATCH] Re: Linux 2.4.4-ac10 Rik van Riel
2001-05-24 10:32                             ` Mike Galbraith
2001-05-24 11:03                               ` Rik van Riel
2001-05-24 14:23                                 ` Mike Galbraith
2001-05-20 15:32                   ` Ingo Oeser
2001-05-20 17:38                     ` Mike Galbraith
2001-05-20 13:44               ` Zlatko Calusic
2001-05-20 17:58                 ` Mike Galbraith
2001-05-20 19:32                   ` Marcelo Tosatti
2001-05-20 21:03               ` Marcelo Tosatti
2001-05-21  3:54                 ` Mike Galbraith
     [not found] <Pine.LNX.4.21.0105201837240.5531-100000@imladris.rielhome.conectiva>
2001-05-21  3:44 ` Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.33.0105241041100.369-100000@mikeg.weiden.de \
    --to=mikeg@wen-online.de \
    --cc=ingo.oeser@informatik.tu-chemnitz.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@conectiva.com.br \
    --cc=sct@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox