* [itcompilesshipitPATCH] -ac22-riel vm improvement?
@ 2000-06-19 23:57 Rik van Riel
2000-06-20 0:11 ` Juan J. Quintela
2000-06-20 6:13 ` latancy test of -ac22-riel Roger Larsson
0 siblings, 2 replies; 6+ messages in thread
From: Rik van Riel @ 2000-06-19 23:57 UTC (permalink / raw)
To: Juan J. Quintela; +Cc: linux-mm
Hi,
the following patch should implement the following things,
but due to lack of a test machine at home and enormous
peer pressure by the #humboltluitjes folks to send this
out _before_ dinner, I can't tell for sure...
- shrink_mmap() deadlock prevention
- uses bdflush/kflushd to sync the dirty buffers in an
efficient way (only stalls when we really can't keep up)
- uses the memory_pressure() stuff to make sure we don't do
too much work
- reintroduces the zone->free_pages > zone->pages_high patch
Since all of this patch does no more than simple code reuse of
other parts of the kernel, it should be good enough to give it
a try and tell me if it works :)
regards,
Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.
Wanna talk about the kernel? irc.openprojects.net / #kernelnewbies
http://www.conectiva.com/ http://www.surriel.com/
--- linux-2.4.0-t1-ac22-riel/mm/filemap.c.orig Mon Jun 19 18:27:05 2000
+++ linux-2.4.0-t1-ac22-riel/mm/filemap.c Mon Jun 19 20:49:03 2000
@@ -301,16 +301,19 @@
*/
int shrink_mmap(int priority, int gfp_mask)
{
- int ret = 0, count, nr_dirty;
+ int ret = 0, count, maxscan, nr_dirty, loop = 0;
struct list_head * page_lru;
struct page * page = NULL;
+shrink_again:
count = nr_lru_pages / (priority + 1);
- nr_dirty = priority;
+ maxscan = nr_lru_pages;
+ nr_dirty = 0;
/* we need pagemap_lru_lock for list_del() ... subtle code below */
spin_lock(&pagemap_lru_lock);
- while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
+ while ((count > 0) && (maxscan-- > 0) && memory_pressure() &&
+ (page_lru = lru_cache.prev) != &lru_cache) {
page = list_entry(page_lru, struct page, lru);
list_del(page_lru);
@@ -351,9 +354,12 @@
* of zone - it's old.
*/
if (page->buffers) {
- int wait = ((gfp_mask & __GFP_IO) && (nr_dirty-- < 0));
+ int wait = ((gfp_mask & __GFP_IO) ? 0 : -1);
+ nr_dirty++;
if (!try_to_free_buffers(page, wait))
goto unlock_continue;
+ /* We freed the buffers so it wasn't dirty */
+ nr_dirty--;
/* page was locked, inode can't go away under us */
if (!page->mapping) {
atomic_dec(&buffermem_pages);
@@ -361,6 +367,15 @@
}
}
+ /*
+ * Are there more than enough free pages in this zone?
+ * Don't drop the page since it contains useful data.
+ */
+ if (page->zone->free_pages > page->zone->pages_high) {
+ count++;
+ goto unlock_continue;
+ }
+
/* Take the pagecache_lock spinlock held to avoid
other tasks to notice the page while we are looking at its
page count. If it's a pagecache-page we'll free it
@@ -387,6 +402,7 @@
}
/* PageDeferswap -> we swap out the page now. */
if (gfp_mask & __GFP_IO) {
+ nr_dirty++;
spin_unlock(&pagecache_lock);
/* Do NOT unlock the page ... brw_page does. */
ClearPageDirty(page);
@@ -433,6 +449,17 @@
out:
spin_unlock(&pagemap_lru_lock);
+
+ /* We scheduled pages for IO? Wake up kflushd. */
+ if (nr_dirty) {
+ if (!loop && !ret && (gfp_mask & __GFP_IO)) {
+ loop = 1;
+ wakeup_bdflush(1);
+ goto shrink_again;
+ } else {
+ wakeup_bdflush(0);
+ }
+ }
return ret;
}
--- linux-2.4.0-t1-ac22-riel/mm/vmscan.c.orig Mon Jun 19 18:27:05 2000
+++ linux-2.4.0-t1-ac22-riel/mm/vmscan.c Mon Jun 19 19:02:28 2000
@@ -186,9 +186,7 @@
flush_tlb_page(vma, address);
vmlist_access_unlock(vma->vm_mm);
- /* OK, do a physical asynchronous write to swap. */
- // rw_swap_page(WRITE, page, 0);
- /* Let shrink_mmap handle this swapout. */
+ /* Mark the page for swapout. Shrink_mmap does the hard work. */
SetPageDirty(page);
UnlockPage(page);
@@ -427,6 +425,32 @@
return __ret;
}
+/**
+ * memory_pressure - check if the system is under memory pressure
+ *
+ * Returns 1 if the system is low on memory in at least one zone,
+ * 0 otherwise
+ */
+int memory_pressure(void)
+{
+ pg_data_t *pgdat = pgdat_list;
+
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones + i;
+ if (!zone->size || !zone->zone_wake_kswapd)
+ continue;
+ if (zone->free_pages < zone->pages_low)
+ return 1;
+ }
+ pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ /* Found no zone with memory pressure? */
+ return 0;
+}
+
/*
* We need to make the locks finer granularity, but right
* now we need this so that we can do page allocations
@@ -458,6 +482,8 @@
goto done;
}
+ if (!memory_pressure())
+ return 1;
/* Try to get rid of some shared memory pages.. */
if (gfp_mask & __GFP_IO) {
@@ -512,7 +538,7 @@
} else {
priority--;
}
- } while (priority >= 0);
+ } while (priority >= 0 && memory_pressure());
/* Always end on a shrink_mmap.. */
while (shrink_mmap(0, gfp_mask)) {
@@ -521,6 +547,9 @@
goto done;
}
+ if (!memory_pressure())
+ ret = 1;
+
done:
return ret;
}
@@ -563,30 +592,22 @@
*/
tsk->flags |= PF_MEMALLOC;
+ /*
+ * Kswapd needs to run for the entire lifetime of the system...
+ */
for (;;) {
- pg_data_t *pgdat;
- int something_to_do = 0;
-
- pgdat = pgdat_list;
- do {
- int i;
- for(i = 0; i < MAX_NR_ZONES; i++) {
- zone_t *zone = pgdat->node_zones+ i;
- if (tsk->need_resched)
- schedule();
- if (!zone->size || !zone->zone_wake_kswapd)
- continue;
- if (zone->free_pages < zone->pages_low)
- something_to_do = 1;
- do_try_to_free_pages(GFP_KSWAPD);
- }
- pgdat = pgdat->node_next;
- } while (pgdat);
-
- if (!something_to_do) {
+ if (memory_pressure()) {
+ /* If there is memory pressure, try to free pages. */
+ do_try_to_free_pages(GFP_KSWAPD);
+ } else {
+ /* Else, we sleep and wait for somebody to wake us. */
tsk->state = TASK_INTERRUPTIBLE;
interruptible_sleep_on(&kswapd_wait);
}
+
+ /* Yield if something more important needs to run. */
+ if (tsk->need_resched)
+ schedule();
}
}
--- linux-2.4.0-t1-ac22-riel/include/linux/swap.h.orig Mon Jun 19 19:03:56 2000
+++ linux-2.4.0-t1-ac22-riel/include/linux/swap.h Mon Jun 19 19:08:00 2000
@@ -87,6 +87,7 @@
/* linux/mm/vmscan.c */
extern int try_to_free_pages(unsigned int gfp_mask);
+extern int memory_pressure(void);
/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *, int);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [itcompilesshipitPATCH] -ac22-riel vm improvement?
2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
@ 2000-06-20 0:11 ` Juan J. Quintela
2000-06-20 6:13 ` latancy test of -ac22-riel Roger Larsson
1 sibling, 0 replies; 6+ messages in thread
From: Juan J. Quintela @ 2000-06-20 0:11 UTC (permalink / raw)
To: Rik van Riel; +Cc: linux-mm
>>>>> "rik" == Rik van Riel <riel@conectiva.com.br> writes:
Hi
rik> the following patch should implement the following things,
rik> but due to lack of a test machine at home and enormous
rik> peer pressure by the #humboltluitjes folks to send this
rik> out _before_ dinner, I can't tell for sure...
rik> - shrink_mmap() deadlock prevention
rik> - uses bdflush/kflushd to sync the dirty buffers in an
rik> efficient way (only stalls when we really can't keep up)
rik> - uses the memory_pressure() stuff to make sure we don't do
rik> too much work
rik> - reintroduces the zone->free_pages > zone->pages_high patch
rik> Since all of this patch does no more than simple code reuse of
rik> other parts of the kernel, it should be good enough to give it
rik> a try and tell me if it works :)
Rik forgot the patch to try_to_free_buffers that accept the -1
argument, here it is.
Later, Juan.
> Hi quintela,
> this patch adds an option to try_to_free_buffers where file IO is
> skipped alltogether...
> cheers,
--- fs/buffer.c.orig Thu Jun 8 12:57:35 2000
+++ fs/buffer.c Thu Jun 8 12:59:44 2000
@@ -2234,6 +2234,11 @@
* NOTE: There are quite a number of ways that threads of control can
* obtain a reference to a buffer head within a page. So we must
* lock out all of these paths to cleanly toss the page.
+ *
+ * Different values for wait:
+ * -1: don't do IO to free the buffers associated with page
+ * 0: start asynchronous IO to free the buffers
+ * 1: wait until the buffers have been freed
*/
int try_to_free_buffers(struct page * page, int wait)
{
@@ -2286,7 +2291,7 @@
spin_unlock(&free_list[index].lock);
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
- if (sync_page_buffers(bh, wait))
+ if (wait >= 0 && sync_page_buffers(bh, wait))
goto again;
return 0;
}
--
In theory, practice and theory are the same, but in practice they
are different -- Larry McVoy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
* latancy test of -ac22-riel
2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
2000-06-20 0:11 ` Juan J. Quintela
@ 2000-06-20 6:13 ` Roger Larsson
[not found] ` <m2u2eoxwzx.fsf@boreas.southchinaseas>
1 sibling, 1 reply; 6+ messages in thread
From: Roger Larsson @ 2000-06-20 6:13 UTC (permalink / raw)
To: Rik van Riel; +Cc: linux-mm
Hi all,
Things are looking better and better :-)
Running with SCHED_FIFO now gives most interrupt to process
latencies below 3 ms !!!
(streaming 1.5 times RAM; read, write, copy tested)
But there are some, nowadays very few, spikes that hurts...
Worst is above 100 ms
But in this kernel does not have the loop limits in shrink_mmap
/RogerL
PS
Used test programs are at:
http://www.gardena.net/benno/linux/audio
for test programs.
DS
--
Home page:
http://www.norran.net/nra02596/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] Re: latancy test of -ac22-riel
[not found] ` <394FB013.3B21EA28@norran.net>
@ 2000-06-22 19:11 ` John Fremlin
2000-06-22 19:47 ` Rik van Riel
0 siblings, 1 reply; 6+ messages in thread
From: John Fremlin @ 2000-06-22 19:11 UTC (permalink / raw)
To: Roger Larsson; +Cc: linux-mm
Roger Larsson <roger.larsson@norran.net> writes:
[...]
> I retried running with normal prio - then I get stalls
> of > 350ms...
I think some stalls are most probably due to try_to_free_pages below
page_alloc.c::__alloc_pages
/*
* Uhhuh. All the zones have been critical, which means that
* we'd better do some synchronous swap-out. kswapd has not
* been able to cope..
*/
if (!(current->flags & PF_MEMALLOC)) {
if (!try_to_free_pages(gfp_mask)) {
if (!(gfp_mask & __GFP_HIGH))
goto fail;
}
goto fail;
}
That is, it happens in times of high memory stress and when I comment
it out the pauses go away but I'm not sure that this is a good
long-term solution ;-) though IMHO the behaviour without it (VM
killing process) is better than the behaviour with it (paging until
power is cycled).
[...]
--
http://altern.org/vii
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Re: latancy test of -ac22-riel
2000-06-22 19:11 ` [PATCH] " John Fremlin
@ 2000-06-22 19:47 ` Rik van Riel
2000-06-23 1:09 ` John Fremlin
0 siblings, 1 reply; 6+ messages in thread
From: Rik van Riel @ 2000-06-22 19:47 UTC (permalink / raw)
To: John Fremlin; +Cc: Roger Larsson, linux-mm
On 22 Jun 2000, John Fremlin wrote:
> Roger Larsson <roger.larsson@norran.net> writes:
>
> [...]
>
> > I retried running with normal prio - then I get stalls
> > of > 350ms...
>
> I think some stalls are most probably due to try_to_free_pages below
>
> page_alloc.c::__alloc_pages
> /*
> * Uhhuh. All the zones have been critical, which means that
> * we'd better do some synchronous swap-out. kswapd has not
> * been able to cope..
> */
> if (!(current->flags & PF_MEMALLOC)) {
> if (!try_to_free_pages(gfp_mask)) {
> if (!(gfp_mask & __GFP_HIGH))
> goto fail;
> }
> goto fail;
> }
>
> That is, it happens in times of high memory stress and when I
> comment it out the pauses go away but I'm not sure that this is
> a good long-term solution ;-) though IMHO the behaviour without
> it (VM killing process) is better than the behaviour with it
> (paging until power is cycled).
You're confusing things here.
If kswapd was too slow in freeing up memory, but there is
still more memory available, then we should NOT kill a
process but just stall the process until more memory is
available.
OTOH, when we are truly Out Of Memory, then (and only then)
should we kill a process.
Killing a process before we are out of memory is just not
acceptable and should never be done.
regards,
Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.
Wanna talk about the kernel? irc.openprojects.net / #kernelnewbies
http://www.conectiva.com/ http://www.surriel.com/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Re: latancy test of -ac22-riel
2000-06-22 19:47 ` Rik van Riel
@ 2000-06-23 1:09 ` John Fremlin
0 siblings, 0 replies; 6+ messages in thread
From: John Fremlin @ 2000-06-23 1:09 UTC (permalink / raw)
To: linux-mm
Rik van Riel <riel@conectiva.com.br> writes:
> You're confusing things here.
It wouldn't come as great shock to me :-)
But OTOH the patch does stop the annoying stalls so it must be doing
something right.
> If kswapd was too slow in freeing up memory, but there is
> still more memory available, then we should NOT kill a
> process but just stall the process until more memory is
> available.
Yes. What I was trying to get across was that we shouldn't waste a
timeslice trying to find pages to evict which are going to be read
back in next process switch (because most pages are impossible to swap
out).
[...]
Your solution (which is what they do in FreeBSD?) would be ideal, but
it wasn't in my kernel source (test1-ac22-riel).
[...]
--
http://altern.org/vii
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2000-06-23 1:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2000-06-19 23:57 [itcompilesshipitPATCH] -ac22-riel vm improvement? Rik van Riel
2000-06-20 0:11 ` Juan J. Quintela
2000-06-20 6:13 ` latancy test of -ac22-riel Roger Larsson
[not found] ` <m2u2eoxwzx.fsf@boreas.southchinaseas>
[not found] ` <394FB013.3B21EA28@norran.net>
2000-06-22 19:11 ` [PATCH] " John Fremlin
2000-06-22 19:47 ` Rik van Riel
2000-06-23 1:09 ` John Fremlin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox