linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [itcompilesshipitPATCH] -ac22-riel vm improvement?
@ 2000-06-19 23:57 Rik van Riel
  2000-06-20  0:11 ` Juan J. Quintela
  2000-06-20  6:13 ` latancy test of -ac22-riel Roger Larsson
  0 siblings, 2 replies; 6+ messages in thread
From: Rik van Riel @ 2000-06-19 23:57 UTC (permalink / raw)
  To: Juan J. Quintela; +Cc: linux-mm

Hi,

the following patch should implement the following things,
but due to lack of a test machine at home and enormous
peer pressure by the #humboltluitjes folks to send this
out _before_ dinner, I can't tell for sure...

- shrink_mmap() deadlock prevention
- uses bdflush/kflushd to sync the dirty buffers in an
  efficient way (only stalls when we really can't keep up)
- uses the memory_pressure() stuff to make sure we don't do
  too much work
- reintroduces the zone->free_pages > zone->pages_high patch

Since all of this patch does no more than simple code reuse of
other parts of the kernel, it should be good enough to give it
a try and tell me if it works :)

regards,

Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.

Wanna talk about the kernel?  irc.openprojects.net / #kernelnewbies
http://www.conectiva.com/		http://www.surriel.com/



--- linux-2.4.0-t1-ac22-riel/mm/filemap.c.orig	Mon Jun 19 18:27:05 2000
+++ linux-2.4.0-t1-ac22-riel/mm/filemap.c	Mon Jun 19 20:49:03 2000
@@ -301,16 +301,19 @@
  */
 int shrink_mmap(int priority, int gfp_mask)
 {
-	int ret = 0, count, nr_dirty;
+	int ret = 0, count, maxscan, nr_dirty, loop = 0;
 	struct list_head * page_lru;
 	struct page * page = NULL;
 	
+shrink_again:
 	count = nr_lru_pages / (priority + 1);
-	nr_dirty = priority;
+	maxscan = nr_lru_pages;
+	nr_dirty = 0;
 
 	/* we need pagemap_lru_lock for list_del() ... subtle code below */
 	spin_lock(&pagemap_lru_lock);
-	while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
+	while ((count > 0) && (maxscan-- > 0) && memory_pressure() &&
+			(page_lru = lru_cache.prev) != &lru_cache) {
 		page = list_entry(page_lru, struct page, lru);
 		list_del(page_lru);
 
@@ -351,9 +354,12 @@
 		 * of zone - it's old.
 		 */
 		if (page->buffers) {
-			int wait = ((gfp_mask & __GFP_IO) && (nr_dirty-- < 0));
+			int wait = ((gfp_mask & __GFP_IO) ? 0 : -1);
+			nr_dirty++;
 			if (!try_to_free_buffers(page, wait))
 				goto unlock_continue;
+			/* We freed the buffers so it wasn't dirty */
+			nr_dirty--;
 			/* page was locked, inode can't go away under us */
 			if (!page->mapping) {
 				atomic_dec(&buffermem_pages);
@@ -361,6 +367,15 @@
 			}
 		}
 
+		/*
+		 * Are there more than enough free pages in this zone?
+		 * Don't drop the page since it contains useful data.
+		 */
+		if (page->zone->free_pages > page->zone->pages_high) {
+			count++;
+			goto unlock_continue;
+		}
+
 		/* Take the pagecache_lock spinlock held to avoid
 		   other tasks to notice the page while we are looking at its
 		   page count. If it's a pagecache-page we'll free it
@@ -387,6 +402,7 @@
 			}
 			/* PageDeferswap -> we swap out the page now. */
 			if (gfp_mask & __GFP_IO) {
+				nr_dirty++;
 				spin_unlock(&pagecache_lock);
 				/* Do NOT unlock the page ... brw_page does. */
 				ClearPageDirty(page);
@@ -433,6 +449,17 @@
 
 out:
 	spin_unlock(&pagemap_lru_lock);
+
+	/* We scheduled pages for IO? Wake up kflushd. */
+	if (nr_dirty) {
+		if (!loop && !ret && (gfp_mask & __GFP_IO)) {
+			loop = 1;
+			wakeup_bdflush(1);
+			goto shrink_again;
+		} else {
+			wakeup_bdflush(0);
+		}
+	}
 
 	return ret;
 }
--- linux-2.4.0-t1-ac22-riel/mm/vmscan.c.orig	Mon Jun 19 18:27:05 2000
+++ linux-2.4.0-t1-ac22-riel/mm/vmscan.c	Mon Jun 19 19:02:28 2000
@@ -186,9 +186,7 @@
 	flush_tlb_page(vma, address);
 	vmlist_access_unlock(vma->vm_mm);
 
-	/* OK, do a physical asynchronous write to swap.  */
-	// rw_swap_page(WRITE, page, 0);
-	/* Let shrink_mmap handle this swapout. */
+	/* Mark the page for swapout. Shrink_mmap does the hard work. */
 	SetPageDirty(page);
 	UnlockPage(page);
 
@@ -427,6 +425,32 @@
 	return __ret;
 }
 
+/**
+ * memory_pressure - check if the system is under memory pressure
+ *
+ * Returns 1 if the system is low on memory in at least one zone,
+ * 0 otherwise
+ */
+int memory_pressure(void)
+{
+	pg_data_t *pgdat = pgdat_list;
+
+	do {
+		int i;
+		for(i = 0; i < MAX_NR_ZONES; i++) {
+			zone_t *zone = pgdat->node_zones + i;
+			if (!zone->size || !zone->zone_wake_kswapd)
+				continue;
+			if (zone->free_pages < zone->pages_low)
+				return 1;
+		}
+		pgdat = pgdat->node_next;
+	} while (pgdat);
+
+	/* Found no zone with memory pressure? */
+	return 0;
+}
+
 /*
  * We need to make the locks finer granularity, but right
  * now we need this so that we can do page allocations
@@ -458,6 +482,8 @@
 				goto done;
 		}
 
+		if (!memory_pressure())
+			return 1;
 
 		/* Try to get rid of some shared memory pages.. */
 		if (gfp_mask & __GFP_IO) {
@@ -512,7 +538,7 @@
 		} else {
 			priority--;
 		}
-	} while (priority >= 0);
+	} while (priority >= 0 && memory_pressure());
 
 	/* Always end on a shrink_mmap.. */
 	while (shrink_mmap(0, gfp_mask)) {
@@ -521,6 +547,9 @@
 			goto done;
 	}
 
+	if (!memory_pressure())
+		ret = 1;
+
 done:
 	return ret;
 }
@@ -563,30 +592,22 @@
 	 */
 	tsk->flags |= PF_MEMALLOC;
 
+	/*
+	 * Kswapd needs to run for the entire lifetime of the system...
+	 */
 	for (;;) {
-		pg_data_t *pgdat;
-		int something_to_do = 0;
-
-		pgdat = pgdat_list;
-		do {
-			int i;
-			for(i = 0; i < MAX_NR_ZONES; i++) {
-				zone_t *zone = pgdat->node_zones+ i;
-				if (tsk->need_resched)
-					schedule();
-				if (!zone->size || !zone->zone_wake_kswapd)
-					continue;
-				if (zone->free_pages < zone->pages_low)
-					something_to_do = 1;
-				do_try_to_free_pages(GFP_KSWAPD);
-			}
-			pgdat = pgdat->node_next;
-		} while (pgdat);
-
-		if (!something_to_do) {
+		if (memory_pressure()) {
+			/* If there is memory pressure, try to free pages. */
+			do_try_to_free_pages(GFP_KSWAPD);
+		} else {
+			/* Else, we sleep and wait for somebody to wake us. */
 			tsk->state = TASK_INTERRUPTIBLE;
 			interruptible_sleep_on(&kswapd_wait);
 		}
+
+		/* Yield if something more important needs to run. */
+		if (tsk->need_resched)
+			schedule();
 	}
 }
 
--- linux-2.4.0-t1-ac22-riel/include/linux/swap.h.orig	Mon Jun 19 19:03:56 2000
+++ linux-2.4.0-t1-ac22-riel/include/linux/swap.h	Mon Jun 19 19:08:00 2000
@@ -87,6 +87,7 @@
 
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(unsigned int gfp_mask);
+extern int memory_pressure(void);
 
 /* linux/mm/page_io.c */
 extern void rw_swap_page(int, struct page *, int);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [itcompilesshipitPATCH] -ac22-riel vm improvement?
  2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
@ 2000-06-20  0:11 ` Juan J. Quintela
  2000-06-20  6:13 ` latancy test of -ac22-riel Roger Larsson
  1 sibling, 0 replies; 6+ messages in thread
From: Juan J. Quintela @ 2000-06-20  0:11 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-mm

>>>>> "rik" == Rik van Riel <riel@conectiva.com.br> writes:

Hi

rik> the following patch should implement the following things,
rik> but due to lack of a test machine at home and enormous
rik> peer pressure by the #humboltluitjes folks to send this
rik> out _before_ dinner, I can't tell for sure...

rik> - shrink_mmap() deadlock prevention
rik> - uses bdflush/kflushd to sync the dirty buffers in an
rik>   efficient way (only stalls when we really can't keep up)
rik> - uses the memory_pressure() stuff to make sure we don't do
rik>   too much work
rik> - reintroduces the zone->free_pages > zone->pages_high patch

rik> Since all of this patch does no more than simple code reuse of
rik> other parts of the kernel, it should be good enough to give it
rik> a try and tell me if it works :)

Rik forgot the patch to try_to_free_buffers that accept the -1
argument, here it is.


Later, Juan.

> Hi quintela,

> this patch adds an option to try_to_free_buffers where file IO is
> skipped alltogether...

> cheers,


--- fs/buffer.c.orig	Thu Jun  8 12:57:35 2000
+++ fs/buffer.c	Thu Jun  8 12:59:44 2000
@@ -2234,6 +2234,11 @@
  * NOTE: There are quite a number of ways that threads of control can
  *       obtain a reference to a buffer head within a page.  So we must
  *	 lock out all of these paths to cleanly toss the page.
+ *
+ * Different values for wait:
+ * -1:  don't do IO to free the buffers associated with page
+ *  0:  start asynchronous IO to free the buffers
+ *  1:  wait until the buffers have been freed
  */
 int try_to_free_buffers(struct page * page, int wait)
 {
@@ -2286,7 +2291,7 @@
 	spin_unlock(&free_list[index].lock);
 	write_unlock(&hash_table_lock);
 	spin_unlock(&lru_list_lock);	
-	if (sync_page_buffers(bh, wait))
+	if (wait >= 0 && sync_page_buffers(bh, wait))
 		goto again;
 	return 0;
 }




-- 
In theory, practice and theory are the same, but in practice they 
are different -- Larry McVoy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* latancy test of -ac22-riel
  2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
  2000-06-20  0:11 ` Juan J. Quintela
@ 2000-06-20  6:13 ` Roger Larsson
       [not found]   ` <m2u2eoxwzx.fsf@boreas.southchinaseas>
  1 sibling, 1 reply; 6+ messages in thread
From: Roger Larsson @ 2000-06-20  6:13 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-mm

Hi all,

Things are looking better and better :-)
Running with SCHED_FIFO now gives most interrupt to process
latencies below 3 ms !!!
(streaming 1.5 times RAM; read, write, copy tested)

But there are some, nowadays very few, spikes that hurts...
Worst is above 100 ms

But in this kernel does not have the loop limits in shrink_mmap

/RogerL

PS
  Used test programs are at:
  http://www.gardena.net/benno/linux/audio 
  for test programs.
DS

--
Home page:
  http://www.norran.net/nra02596/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Re: latancy test of -ac22-riel
       [not found]     ` <394FB013.3B21EA28@norran.net>
@ 2000-06-22 19:11       ` John Fremlin
  2000-06-22 19:47         ` Rik van Riel
  0 siblings, 1 reply; 6+ messages in thread
From: John Fremlin @ 2000-06-22 19:11 UTC (permalink / raw)
  To: Roger Larsson; +Cc: linux-mm

Roger Larsson <roger.larsson@norran.net> writes:

[...]

> I retried running with normal prio - then I get stalls
> of > 350ms...

I think some stalls are most probably due to try_to_free_pages below

page_alloc.c::__alloc_pages
	/*
	 * Uhhuh. All the zones have been critical, which means that
	 * we'd better do some synchronous swap-out. kswapd has not
	 * been able to cope..
	 */
	if (!(current->flags & PF_MEMALLOC)) {
		if (!try_to_free_pages(gfp_mask)) {
			if (!(gfp_mask & __GFP_HIGH))
				goto fail;
		}
		goto fail;
	}

That is, it happens in times of high memory stress and when I comment
it out the pauses go away but I'm not sure that this is a good
long-term solution ;-) though IMHO the behaviour without it (VM
killing process) is better than the behaviour with it (paging until
power is cycled).

[...]

-- 

	http://altern.org/vii
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Re: latancy test of -ac22-riel
  2000-06-22 19:11       ` [PATCH] " John Fremlin
@ 2000-06-22 19:47         ` Rik van Riel
  2000-06-23  1:09           ` John Fremlin
  0 siblings, 1 reply; 6+ messages in thread
From: Rik van Riel @ 2000-06-22 19:47 UTC (permalink / raw)
  To: John Fremlin; +Cc: Roger Larsson, linux-mm

On 22 Jun 2000, John Fremlin wrote:
> Roger Larsson <roger.larsson@norran.net> writes:
> 
> [...]
> 
> > I retried running with normal prio - then I get stalls
> > of > 350ms...
> 
> I think some stalls are most probably due to try_to_free_pages below
> 
> page_alloc.c::__alloc_pages
> 	/*
> 	 * Uhhuh. All the zones have been critical, which means that
> 	 * we'd better do some synchronous swap-out. kswapd has not
> 	 * been able to cope..
> 	 */
> 	if (!(current->flags & PF_MEMALLOC)) {
> 		if (!try_to_free_pages(gfp_mask)) {
> 			if (!(gfp_mask & __GFP_HIGH))
> 				goto fail;
> 		}
> 		goto fail;
> 	}
> 
> That is, it happens in times of high memory stress and when I
> comment it out the pauses go away but I'm not sure that this is
> a good long-term solution ;-) though IMHO the behaviour without
> it (VM killing process) is better than the behaviour with it
> (paging until power is cycled).

You're confusing things here.

If kswapd was too slow in freeing up memory, but there is
still more memory available, then we should NOT kill a
process but just stall the process until more memory is
available.

OTOH, when we are truly Out Of Memory, then (and only then)
should we kill a process.

Killing a process before we are out of memory is just not
acceptable and should never be done.

regards,

Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.

Wanna talk about the kernel?  irc.openprojects.net / #kernelnewbies
http://www.conectiva.com/		http://www.surriel.com/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Re: latancy test of -ac22-riel
  2000-06-22 19:47         ` Rik van Riel
@ 2000-06-23  1:09           ` John Fremlin
  0 siblings, 0 replies; 6+ messages in thread
From: John Fremlin @ 2000-06-23  1:09 UTC (permalink / raw)
  To: linux-mm

Rik van Riel <riel@conectiva.com.br> writes:

> You're confusing things here.

It wouldn't come as great shock to me :-)

But OTOH the patch does stop the annoying stalls so it must be doing
something right.

> If kswapd was too slow in freeing up memory, but there is
> still more memory available, then we should NOT kill a
> process but just stall the process until more memory is
> available.

Yes. What I was trying to get across was that we shouldn't waste a
timeslice trying to find pages to evict which are going to be read
back in next process switch (because most pages are impossible to swap
out).

[...]

Your solution (which is what they do in FreeBSD?) would be ideal, but
it wasn't in my kernel source (test1-ac22-riel).

[...]

-- 

	http://altern.org/vii
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2000-06-23  1:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
2000-06-20  0:11 ` Juan J. Quintela
2000-06-20  6:13 ` latancy test of -ac22-riel Roger Larsson
     [not found]   ` <m2u2eoxwzx.fsf@boreas.southchinaseas>
     [not found]     ` <394FB013.3B21EA28@norran.net>
2000-06-22 19:11       ` [PATCH] " John Fremlin
2000-06-22 19:47         ` Rik van Riel
2000-06-23  1:09           ` John Fremlin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox