* PATCH: Improvements in shrink_mmap and kswapd
@ 2000-06-17 22:45 Juan J. Quintela
2000-06-17 23:12 ` Rik van Riel
` (4 more replies)
0 siblings, 5 replies; 8+ messages in thread
From: Juan J. Quintela @ 2000-06-17 22:45 UTC (permalink / raw)
To: Alan Cox, lkml, linux-mm, linux-fsdevel
Hi
this patch makes kswapd use less resources. It should solve
the kswapd eats xx% of my CPU problems. It appears that it improves
IO a bit here. Could people having problems with IO told me if this
patch improves things, I am interested in knowing that it don't makes
things worst never. This patch is stable here. I am finishing the
deferred mmaped pages form file writing patch, that should solve
several other problems.
Reports of success/failure are welcome. Comments are also welcome.
Later, Juan.
This patch implements:
- never loops infinitely is shrink_mmap (it walks as maximum once per
page)
- it changes the nr_dirty logic to max_launder_page logic. We start
writing async a maximum of max_launder_page (100), and after that
point we never start more writes for that run of shrink_mmap. If we
start max_launder_page writes, we wait at the end of the function if
possible (i.e __gfp_mask let do that).
- It checks that there is some zone with need of pages before continue
with the loop. If there is no pages, stop walking the LRU.
- I have got the patch from Roger Larson for the memory pressure and
have partially re implemented/increasing it.
- kswapd rewrite in similar way that Roger Larson one.
- added the function memory_pressure that returns 1 if there is
memory_pressure and 0 if there is no pressure.
- I have got Manfred patch to use test_and_test_and_clear_bit
optimization in ClearPageReferenced.
- Added ClearPageDirty(page) to __remove_inode_pages to solve the
ramfs problems.
- Added __lru_cache_del and __lru_cache_add and use them in
shrink_mmap.
- Makes a cleanup of several cruft in shirk_mmap.
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/asm-i386/bitops.h working/include/asm-i386/bitops.h
--- base/include/asm-i386/bitops.h Sat Jun 17 23:37:03 2000
+++ working/include/asm-i386/bitops.h Sat Jun 17 23:52:49 2000
@@ -29,6 +29,7 @@
extern void change_bit(int nr, volatile void * addr);
extern int test_and_set_bit(int nr, volatile void * addr);
extern int test_and_clear_bit(int nr, volatile void * addr);
+extern int test_and_test_and_clear_bit(int nr, volatile void * addr);
extern int test_and_change_bit(int nr, volatile void * addr);
extern int __constant_test_bit(int nr, const volatile void * addr);
extern int __test_bit(int nr, volatile void * addr);
@@ -87,6 +88,13 @@
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr));
return oldbit;
+}
+
+extern __inline__ int test_and_test_and_clear_bit(int nr, volatile void *addr)
+{
+ if(!(((unsigned long)addr) & (1<<nr)))
+ return 0;
+ return test_and_clear_bit(nr,addr);
}
extern __inline__ int test_and_change_bit(int nr, volatile void * addr)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/mm.h working/include/linux/mm.h
--- base/include/linux/mm.h Sat Jun 17 23:37:03 2000
+++ working/include/linux/mm.h Sun Jun 18 00:23:05 2000
@@ -203,7 +203,7 @@
#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags)
#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags)
-#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
+#define PageTestandClearReferenced(page) test_and_test_and_clear_bit(PG_referenced, &(page)->flags)
#define PageDecrAfter(page) test_bit(PG_decr_after, &(page)->flags)
#define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags)
#define PageTestandClearDecrAfter(page) test_and_clear_bit(PG_decr_after, &(page)->flags)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/swap.h working/include/linux/swap.h
--- base/include/linux/swap.h Sat Jun 17 23:37:16 2000
+++ working/include/linux/swap.h Sat Jun 17 23:52:49 2000
@@ -87,6 +87,7 @@
/* linux/mm/vmscan.c */
extern int try_to_free_pages(unsigned int gfp_mask);
+extern int memory_pressure(void);
/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *, int);
@@ -173,11 +174,17 @@
/*
* Helper macros for lru_pages handling.
*/
-#define lru_cache_add(page) \
+
+#define __lru_cache_add(page) \
do { \
- spin_lock(&pagemap_lru_lock); \
list_add(&(page)->lru, &lru_cache); \
nr_lru_pages++; \
+} while (0)
+
+#define lru_cache_add(page) \
+do { \
+ spin_lock(&pagemap_lru_lock); \
+ __lru_cache_add(page); \
page->age = PG_AGE_START; \
ClearPageReferenced(page); \
SetPageActive(page); \
@@ -187,7 +194,6 @@
#define __lru_cache_del(page) \
do { \
list_del(&(page)->lru); \
- ClearPageActive(page); \
nr_lru_pages--; \
} while (0)
@@ -196,6 +202,7 @@
if (!PageLocked(page)) \
BUG(); \
spin_lock(&pagemap_lru_lock); \
+ ClearPageActive(page); \
__lru_cache_del(page); \
spin_unlock(&pagemap_lru_lock); \
} while (0)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/filemap.c working/mm/filemap.c
--- base/mm/filemap.c Sat Jun 17 23:25:43 2000
+++ working/mm/filemap.c Sun Jun 18 00:36:19 2000
@@ -65,8 +65,8 @@
(*p)->pprev_hash = &page->next_hash;
*p = page;
page->pprev_hash = p;
- if (page->buffers)
- PAGE_BUG(page);
+// if (page->buffers)
+// PAGE_BUG(page);
}
static inline void remove_page_from_hash_queue(struct page * page)
@@ -102,6 +102,7 @@
if (page->buffers)
BUG();
+ ClearPageDirty(page);
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
page->mapping = NULL;
@@ -294,36 +295,55 @@
spin_unlock(&pagecache_lock);
}
-/*
- * nr_dirty represents the number of dirty pages that we will write async
- * before doing sync writes. We can only do sync writes if we can
- * wait for IO (__GFP_IO set).
+/**
+ * shrink_mmap - Tries to free memory
+ * @priority: how hard we will try to free pages (0 hardest)
+ * @gfp_mask: Restrictions to free pages
+ *
+ * This function walks the lru list searching for free pages. It
+ * returns 1 to indicate success and 0 in the opposite case. It gets a
+ * lock in the pagemap_lru_lock and the pagecache_lock.
*/
+/* nr_to_examinate counts the number of pages that we will read as
+ * maximum as each call. This means that we don't loop.
+ */
+/* nr_writes counts the number of writes that we have started to the
+ * moment. We limitate the number of writes in each round to
+ * max_page_launder. ToDo: Make that variable tunable through sysctl.
+ */
+const int max_page_launder = 100;
+
int shrink_mmap(int priority, int gfp_mask)
{
- int ret = 0, count, nr_dirty;
struct list_head * page_lru;
struct page * page = NULL;
-
- count = nr_lru_pages / (priority + 1);
- nr_dirty = priority;
+ int ret;
+ int nr_to_examinate = nr_lru_pages;
+ int nr_writes = 0;
+ int count = nr_lru_pages / (priority + 1);
/* we need pagemap_lru_lock for list_del() ... subtle code below */
spin_lock(&pagemap_lru_lock);
while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
+ /* We exit if we have examinated all the LRU pages */
+ if(!nr_to_examinate--)
+ break;
+
+ /* if there is no zone low on memory we return */
+ if(!memory_pressure())
+ break;
+
page = list_entry(page_lru, struct page, lru);
- list_del(page_lru);
+ __lru_cache_del(page);
if (PageTestandClearReferenced(page)) {
- page->age += PG_AGE_ADV;
- if (page->age > PG_AGE_MAX)
- page->age = PG_AGE_MAX;
- goto dispose_continue;
+ page->age = min(PG_AGE_MAX, page->age + PG_AGE_ADV);
+ goto reinsert_page_continue;
}
page->age -= min(PG_AGE_DECL, page->age);
if (page->age)
- goto dispose_continue;
+ goto reinsert_page_continue;
count--;
/*
@@ -331,16 +351,18 @@
* immediate tell are untouchable..
*/
if (!page->buffers && page_count(page) > 1)
- goto dispose_continue;
+ goto reinsert_page_continue;
if (TryLockPage(page))
- goto dispose_continue;
+ goto reinsert_page_continue;
- /* Release the pagemap_lru lock even if the page is not yet
- queued in any lru queue since we have just locked down
- the page so nobody else may SMP race with us running
- a lru_cache_del() (lru_cache_del() always run with the
- page locked down ;). */
+ /*
+ * Release the pagemap_lru lock even if the page is
+ * not yet queued in any lru queue since we have just
+ * locked down the page so nobody else may SMP race
+ * with us running a lru_cache_del() (lru_cache_del()
+ * always run with the page locked down ;).
+ */
spin_unlock(&pagemap_lru_lock);
/* avoid freeing the page while it's locked */
@@ -351,14 +373,17 @@
* of zone - it's old.
*/
if (page->buffers) {
- int wait = ((gfp_mask & __GFP_IO) && (nr_dirty-- < 0));
- if (!try_to_free_buffers(page, wait))
+ if (nr_writes < max_page_launder) {
+ nr_writes++;
+ if (!try_to_free_buffers(page, 0))
+ goto unlock_continue;
+ /* page was locked, inode can't go away under us */
+ if (!page->mapping) {
+ atomic_dec(&buffermem_pages);
+ goto made_buffer_progress;
+ }
+ } else
goto unlock_continue;
- /* page was locked, inode can't go away under us */
- if (!page->mapping) {
- atomic_dec(&buffermem_pages);
- goto made_buffer_progress;
- }
}
/*
@@ -371,10 +396,13 @@
goto unlock_continue;
}
- /* Take the pagecache_lock spinlock held to avoid
- other tasks to notice the page while we are looking at its
- page count. If it's a pagecache-page we'll free it
- in one atomic transaction after checking its page count. */
+ /*
+ * Take the pagecache_lock spinlock held to avoid
+ * other tasks to notice the page while we are
+ * looking at its page count. If it's a
+ * pagecache-page we'll free it in one atomic
+ * transaction after checking its page count.
+ */
spin_lock(&pagecache_lock);
/*
@@ -396,14 +424,15 @@
goto made_inode_progress;
}
/* PageDeferswap -> we swap out the page now. */
- if (gfp_mask & __GFP_IO) {
+ if ((gfp_mask & __GFP_IO) && (nr_writes < max_page_launder)) {
spin_unlock(&pagecache_lock);
+ nr_writes++;
/* Do NOT unlock the page ... brw_page does. */
ClearPageDirty(page);
rw_swap_page(WRITE, page, 0);
spin_lock(&pagemap_lru_lock);
page_cache_release(page);
- goto dispose_continue;
+ goto reinsert_page_continue;
}
goto cache_unlock_continue;
}
@@ -426,23 +455,23 @@
spin_lock(&pagemap_lru_lock);
UnlockPage(page);
page_cache_release(page);
-dispose_continue:
- list_add(page_lru, &lru_cache);
+reinsert_page_continue:
+ __lru_cache_add(page);
}
+ spin_unlock(&pagemap_lru_lock);
+ ret = 0;
goto out;
made_inode_progress:
page_cache_release(page);
made_buffer_progress:
+ ClearPageActive(page);
UnlockPage(page);
page_cache_release(page);
ret = 1;
- spin_lock(&pagemap_lru_lock);
- /* nr_lru_pages needs the spinlock */
- nr_lru_pages--;
-
out:
- spin_unlock(&pagemap_lru_lock);
+ if ((gfp_mask & __GFP_IO) && (nr_writes >= max_page_launder))
+ block_sync_page(page);
return ret;
}
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/swap_state.c working/mm/swap_state.c
--- base/mm/swap_state.c Sat Jun 17 23:25:43 2000
+++ working/mm/swap_state.c Sat Jun 17 23:52:49 2000
@@ -73,7 +73,6 @@
PAGE_BUG(page);
PageClearSwapCache(page);
- ClearPageDirty(page);
remove_inode_page(page);
}
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/vmscan.c working/mm/vmscan.c
--- base/mm/vmscan.c Sat Jun 17 23:51:24 2000
+++ working/mm/vmscan.c Sun Jun 18 00:28:12 2000
@@ -179,16 +179,14 @@
/* Add it to the swap cache */
add_to_swap_cache(page, entry);
+ set_pte(page_table, swp_entry_to_pte(entry));
/* Put the swap entry into the pte after the page is in swapcache */
vma->vm_mm->rss--;
- set_pte(page_table, swp_entry_to_pte(entry));
flush_tlb_page(vma, address);
vmlist_access_unlock(vma->vm_mm);
- /* OK, do a physical asynchronous write to swap. */
- // rw_swap_page(WRITE, page, 0);
- /* Let shrink_mmap handle this swapout. */
+ /* Set page for deferred swap */
SetPageDirty(page);
UnlockPage(page);
@@ -427,6 +425,32 @@
return __ret;
}
+/**
+ * memory_pressure - Is the system under memory pressure
+ *
+ * Returns 1 if the system is low on memory in any of its zones,
+ * otherwise returns 0.
+ */
+int memory_pressure(void)
+{
+ pg_data_t *pgdat = pgdat_list;
+
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones + i;
+ if (!zone->size || !zone->zone_wake_kswapd)
+ continue;
+ if (zone->free_pages < zone->pages_low)
+ return 1;
+ }
+ pgdat = pgdat->node_next;
+
+ } while (pgdat);
+
+ return 0;
+}
+
/*
* We need to make the locks finer granularity, but right
* now we need this so that we can do page allocations
@@ -444,7 +468,6 @@
int priority;
int count = FREE_COUNT;
int swap_count = 0;
- int ret = 0;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
@@ -452,11 +475,12 @@
priority = 64;
do {
while (shrink_mmap(priority, gfp_mask)) {
- ret = 1;
if (!--count)
goto done;
}
+ if(!memory_pressure())
+ return 1;
/* Try to get rid of some shared memory pages.. */
if (gfp_mask & __GFP_IO) {
@@ -468,11 +492,9 @@
count -= shrink_dcache_memory(priority, gfp_mask);
count -= shrink_icache_memory(priority, gfp_mask);
if (count <= 0) {
- ret = 1;
goto done;
}
while (shm_swap(priority, gfp_mask)) {
- ret = 1;
if (!--count)
goto done;
}
@@ -496,18 +518,19 @@
if (--swap_count < 0)
break;
}
+ if(!memory_pressure())
+ return 1;
} while (--priority >= 0);
/* Always end on a shrink_mmap.. */
while (shrink_mmap(0, gfp_mask)) {
- ret = 1;
if (!--count)
goto done;
}
done:
- return ret;
+ return (count < FREE_COUNT);
}
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
@@ -549,26 +572,14 @@
tsk->flags |= PF_MEMALLOC;
for (;;) {
- pg_data_t *pgdat;
- int something_to_do = 0;
+ int pressure = memory_pressure();
- pgdat = pgdat_list;
- do {
- int i;
- for(i = 0; i < MAX_NR_ZONES; i++) {
- zone_t *zone = pgdat->node_zones+ i;
- if (tsk->need_resched)
- schedule();
- if (!zone->size || !zone->zone_wake_kswapd)
- continue;
- if (zone->free_pages < zone->pages_low)
- something_to_do = 1;
- do_try_to_free_pages(GFP_KSWAPD);
- }
- pgdat = pgdat->node_next;
- } while (pgdat);
+ if (tsk->need_resched)
+ schedule();
- if (!something_to_do) {
+ if(pressure)
+ do_try_to_free_pages(GFP_KSWAPD);
+ else {
tsk->state = TASK_INTERRUPTIBLE;
interruptible_sleep_on(&kswapd_wait);
}
--
In theory, practice and theory are the same, but in practice they
are different -- Larry McVoy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
@ 2000-06-17 23:12 ` Rik van Riel
2000-06-17 23:30 ` Roger Larsson
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Rik van Riel @ 2000-06-17 23:12 UTC (permalink / raw)
To: Juan J. Quintela; +Cc: Alan Cox, lkml, linux-mm, linux-fsdevel
On 18 Jun 2000, Juan J. Quintela wrote:
> Reports of success/failure are welcome. Comments are also welcome.
I have a few comments on the patch. They have mostly to do
with the maxlaunder logic.
A few days ago I sent you the buffer.c patch where
try_to_free_buffers was modified so that it would never try
to do IO on pages if the 'wait' argument has a value of -1.
This can be combined with maxlaunder in a nice way. Firstly
we need to wakeup_bdflush() if we queued some buffers or swap
pages for IO, that way bdflush will flush dirty and IO queued
pages to disk.
Secondly we need to try try_to_free_buffers(page, -1) first,
currently you count freeing buffers without doing IO as an
IO operation (and also, you're starting IO operations when
__GFP_IO isn't set). If that fails and maxlaunder isn't reached
yet, we can try to start asynchronous IO on the page.
When we reach the end of shrink_mmap, we can do something like
this:
wait = 0;
if (nr_writes && (gfp_mask & __GFP_IO))
wait = 1;
wake_up_bdflush(wait);
if (wait && !ret) {
goto again; /* bdflush just made pages available, roll again */
}
This will give us something like write throttling where apps
will be waiting for bdflush to have done IO on pages so we'll
have freeable pages around. If __GFP_IO isn't set we'll still
fail, of course, but this will at least keep applications from
failing needlessly.
regards,
Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.
Wanna talk about the kernel? irc.openprojects.net / #kernelnewbies
http://www.conectiva.com/ http://www.surriel.com/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
2000-06-17 23:12 ` Rik van Riel
@ 2000-06-17 23:30 ` Roger Larsson
2000-06-17 23:42 ` Philipp Rumpf
2000-06-18 0:51 ` Roger Larsson
` (2 subsequent siblings)
4 siblings, 1 reply; 8+ messages in thread
From: Roger Larsson @ 2000-06-17 23:30 UTC (permalink / raw)
To: Juan J. Quintela; +Cc: Alan Cox, lkml, linux-mm, linux-fsdevel
> diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/asm-i386/bitops.h working/include/asm-i386/bitops.h
> --- base/include/asm-i386/bitops.h Sat Jun 17 23:37:03 2000
> +++ working/include/asm-i386/bitops.h Sat Jun 17 23:52:49 2000
> @@ -29,6 +29,7 @@
> extern void change_bit(int nr, volatile void * addr);
> extern int test_and_set_bit(int nr, volatile void * addr);
> extern int test_and_clear_bit(int nr, volatile void * addr);
> +extern int test_and_test_and_clear_bit(int nr, volatile void * addr);
> extern int test_and_change_bit(int nr, volatile void * addr);
> extern int __constant_test_bit(int nr, const volatile void * addr);
> extern int __test_bit(int nr, volatile void * addr);
> @@ -87,6 +88,13 @@
> :"=r" (oldbit),"=m" (ADDR)
> :"Ir" (nr));
> return oldbit;
> +}
> +
> +extern __inline__ int test_and_test_and_clear_bit(int nr, volatile void *addr)
> +{
> + if(!(((unsigned long)addr) & (1<<nr)))
> + return 0;
> + return test_and_clear_bit(nr,addr);
> }
This does not look correct. It basically tests if the ADDRESS has bit
#nr set...
Shouldn't it be
+ if(!(((unsigned long)*addr) & (1<<nr)))
/RogerL
--
Home page:
http://www.norran.net/nra02596/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 23:30 ` Roger Larsson
@ 2000-06-17 23:42 ` Philipp Rumpf
2000-06-18 0:00 ` Roger Larsson
0 siblings, 1 reply; 8+ messages in thread
From: Philipp Rumpf @ 2000-06-17 23:42 UTC (permalink / raw)
To: Roger Larsson, Juan J. Quintela; +Cc: Alan Cox, lkml, linux-mm, linux-fsdevel
On Sun, Jun 18, 2000 at 01:30:17AM +0200, Roger Larsson wrote:
> > diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/asm-i386/bitops.h working/include/asm-i386/bitops.h
> > --- base/include/asm-i386/bitops.h Sat Jun 17 23:37:03 2000
> > +++ working/include/asm-i386/bitops.h Sat Jun 17 23:52:49 2000
> > @@ -29,6 +29,7 @@
> > extern void change_bit(int nr, volatile void * addr);
> > extern int test_and_set_bit(int nr, volatile void * addr);
> > extern int test_and_clear_bit(int nr, volatile void * addr);
> > +extern int test_and_test_and_clear_bit(int nr, volatile void * addr);
> > extern int test_and_change_bit(int nr, volatile void * addr);
> > extern int __constant_test_bit(int nr, const volatile void * addr);
> > extern int __test_bit(int nr, volatile void * addr);
> > @@ -87,6 +88,13 @@
> > :"=r" (oldbit),"=m" (ADDR)
> > :"Ir" (nr));
> > return oldbit;
> > +}
> > +
> > +extern __inline__ int test_and_test_and_clear_bit(int nr, volatile void *addr)
> > +{
> > + if(!(((unsigned long)addr) & (1<<nr)))
> > + return 0;
> > + return test_and_clear_bit(nr,addr);
> > }
>
>
> This does not look correct. It basically tests if the ADDRESS has bit
> #nr set...
>
> Shouldn't it be
> + if(!(((unsigned long)*addr) & (1<<nr)))
if(!((*(unsigned long *)addr) & (1<<nr))
is closer to what you want. it still breaks for nr > BITS_PER_LONG.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 23:42 ` Philipp Rumpf
@ 2000-06-18 0:00 ` Roger Larsson
0 siblings, 0 replies; 8+ messages in thread
From: Roger Larsson @ 2000-06-18 0:00 UTC (permalink / raw)
To: Philipp Rumpf; +Cc: Juan J. Quintela, Alan Cox, lkml, linux-mm, linux-fsdevel
Philipp Rumpf wrote:
>
> On Sun, Jun 18, 2000 at 01:30:17AM +0200, Roger Larsson wrote:
> > > diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/asm-i386/bitops.h working/include/asm-i386/bitops.h
> > > --- base/include/asm-i386/bitops.h Sat Jun 17 23:37:03 2000
> > > +++ working/include/asm-i386/bitops.h Sat Jun 17 23:52:49 2000
> > > @@ -29,6 +29,7 @@
> > > extern void change_bit(int nr, volatile void * addr);
> > > extern int test_and_set_bit(int nr, volatile void * addr);
> > > extern int test_and_clear_bit(int nr, volatile void * addr);
> > > +extern int test_and_test_and_clear_bit(int nr, volatile void * addr);
> > > extern int test_and_change_bit(int nr, volatile void * addr);
> > > extern int __constant_test_bit(int nr, const volatile void * addr);
> > > extern int __test_bit(int nr, volatile void * addr);
> > > @@ -87,6 +88,13 @@
> > > :"=r" (oldbit),"=m" (ADDR)
> > > :"Ir" (nr));
> > > return oldbit;
> > > +}
> > > +
> > > +extern __inline__ int test_and_test_and_clear_bit(int nr, volatile void *addr)
> > > +{
> > > + if(!(((unsigned long)addr) & (1<<nr)))
> > > + return 0;
> > > + return test_and_clear_bit(nr,addr);
> > > }
> >
> >
> > This does not look correct. It basically tests if the ADDRESS has bit
> > #nr set...
> >
> > Shouldn't it be
> > + if(!(((unsigned long)*addr) & (1<<nr)))
>
> if(!((*(unsigned long *)addr) & (1<<nr))
>
> is closer to what you want. it still breaks for nr > BITS_PER_LONG.
Final attempt:
return test_bit(nr,addr) && test_and_clear_bit(nr,addr);
It even matches the name :-)
But it have to be moved some lines down.
(Exercise for the reader)
/RogerL
--
Home page:
http://www.norran.net/nra02596/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
2000-06-17 23:12 ` Rik van Riel
2000-06-17 23:30 ` Roger Larsson
@ 2000-06-18 0:51 ` Roger Larsson
2000-06-18 7:57 ` Mike Galbraith
2000-06-19 23:15 ` PATCH: Improvements in shrink_mmap and kswapd (take 2) Juan J. Quintela
4 siblings, 0 replies; 8+ messages in thread
From: Roger Larsson @ 2000-06-18 0:51 UTC (permalink / raw)
To: Juan J. Quintela; +Cc: linux-mm
Hi,
Comments below,
"Juan J. Quintela" wrote:
>
> Hi
> this patch makes kswapd use less resources. It should solve
> the kswapd eats xx% of my CPU problems. It appears that it improves
> IO a bit here. Could people having problems with IO told me if this
> patch improves things, I am interested in knowing that it don't makes
> things worst never. This patch is stable here. I am finishing the
> deferred mmaped pages form file writing patch, that should solve
> several other problems.
>
> Reports of success/failure are welcome. Comments are also welcome.
>
> Later, Juan.
>
> +/**
> + * shrink_mmap - Tries to free memory
> + * @priority: how hard we will try to free pages (0 hardest)
> + * @gfp_mask: Restrictions to free pages
> + *
> + * This function walks the lru list searching for free pages. It
> + * returns 1 to indicate success and 0 in the opposite case. It gets a
> + * lock in the pagemap_lru_lock and the pagecache_lock.
> */
> +/* nr_to_examinate counts the number of pages that we will read as
> + * maximum as each call. This means that we don't loop.
> + */
> +/* nr_writes counts the number of writes that we have started to the
> + * moment. We limitate the number of writes in each round to
> + * max_page_launder. ToDo: Make that variable tunable through sysctl.
> + */
> +const int max_page_launder = 100;
> +
> int shrink_mmap(int priority, int gfp_mask)
> {
> - int ret = 0, count, nr_dirty;
> struct list_head * page_lru;
> struct page * page = NULL;
> -
> - count = nr_lru_pages / (priority + 1);
> - nr_dirty = priority;
> + int ret;
> + int nr_to_examinate = nr_lru_pages;
Is this really enough?
PG_AGE_MAX * nr_lru_pages / (priority + 1)
is required to ensure that all pages have been scanned at an age of 0.
But that is probably an overkill... there is a sum involved here...
PG_AGE_START * ...
Could be nice to get rid of streaming pages before other attempts are
done.
> diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/vmscan.c working/mm/vmscan.c
> --- base/mm/vmscan.c Sat Jun 17 23:51:24 2000
> +++ working/mm/vmscan.c Sun Jun 18 00:28:12 2000
>
> [removed stuff]
>
> @@ -427,6 +425,32 @@
> return __ret;
> }
>
> +/**
> + * memory_pressure - Is the system under memory pressure
> + *
> + * Returns 1 if the system is low on memory in any of its zones,
> + * otherwise returns 0.
> + */
> +int memory_pressure(void)
> +{
> + pg_data_t *pgdat = pgdat_list;
> +
> + do {
> + int i;
> + for(i = 0; i < MAX_NR_ZONES; i++) {
> + zone_t *zone = pgdat->node_zones + i;
> + if (!zone->size || !zone->zone_wake_kswapd)
> + continue;
> + if (zone->free_pages < zone->pages_low)
> + return 1;
> + }
> + pgdat = pgdat->node_next;
> +
> + } while (pgdat);
> +
> + return 0;
> +}
> +
This function effectively ignore 'zone_wake_kswapd' since it should
always be set when free < low - if correct behaviour remove the test.
> priority = 64;
> do {
> while (shrink_mmap(priority, gfp_mask)) {
> - ret = 1;
> if (!--count)
> goto done;
> }
>
> + if(!memory_pressure())
> + return 1;
>
Needs lower than pages_low after shrink_mmap to pass this test and
enter swapping... might be correct behaviour!
/RogerL
--
Home page:
http://www.norran.net/nra02596/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Improvements in shrink_mmap and kswapd
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
` (2 preceding siblings ...)
2000-06-18 0:51 ` Roger Larsson
@ 2000-06-18 7:57 ` Mike Galbraith
2000-06-19 23:15 ` PATCH: Improvements in shrink_mmap and kswapd (take 2) Juan J. Quintela
4 siblings, 0 replies; 8+ messages in thread
From: Mike Galbraith @ 2000-06-18 7:57 UTC (permalink / raw)
To: Juan J. Quintela; +Cc: Alan Cox, lkml, linux-mm, linux-fsdevel
On 18 Jun 2000, Juan J. Quintela wrote:
> Hi
> this patch makes kswapd use less resources. It should solve
> the kswapd eats xx% of my CPU problems. It appears that it improves
> IO a bit here. Could people having problems with IO told me if this
> patch improves things, I am interested in knowing that it don't makes
> things worst never. This patch is stable here. I am finishing the
> deferred mmaped pages form file writing patch, that should solve
> several other problems.
>
> Reports of success/failure are welcome. Comments are also welcome.
Hi Juan,
I added this patch to ac20 + Roger Larsonn fix and gave it a quick burn.
I saw a slight performance drop in both make -j30 build times and streaming
IO (iozone). I didn't do sustained pounding though (consistancy), so...
-Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
* PATCH: Improvements in shrink_mmap and kswapd (take 2)
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
` (3 preceding siblings ...)
2000-06-18 7:57 ` Mike Galbraith
@ 2000-06-19 23:15 ` Juan J. Quintela
4 siblings, 0 replies; 8+ messages in thread
From: Juan J. Quintela @ 2000-06-19 23:15 UTC (permalink / raw)
To: Alan Cox; +Cc: lkml, linux-mm, linux-fsdevel
Hi
here appear to give similar results as ac22-riel, but
shouldn't degenerate in bad behaviour as fast as ac22-riel in
tests like Zlatko.
Reports of success/failure are welcome. Comments are also welcome.
Later, Juan.
take 1 comments:
> this patch makes kswapd use less resources. It should solve
> the kswapd eats xx% of my CPU problems. It appears that it improves
> IO a bit here. Could people having problems with IO told me if this
> patch improves things, I am interested in knowing that it don't makes
> things worst never. This patch is stable here. I am finishing the
> deferred mmaped pages form file writing patch, that should solve
> several other problems.
This patch implements:
take 2:
------
- against ac22-riel
- fixes the problems with test_and_test_and_clear_bit (thanks Roger
Larson and Philipp Rumpf)
- Reintroduces the page->zone test dropped in ac21
- call to wakeup_bdflush() at the end of shrink_mmap (thanks Rick Riel)
- the rest of Riel suggestions haven't been introduced, they need
infraestructure changes in buffer.c that I am studing.
take 1:
------
- never loops infinitely is shrink_mmap (it walks as maximum once per
page)
- it changes the nr_dirty logic to max_launder_page logic. We start
writing async a maximum of max_launder_page (100), and after that
point we never start more writes for that run of shrink_mmap. If we
start max_launder_page writes, we wait at the end of the function if
possible (i.e __gfp_mask let do that).
- It checks that there is some zone with need of pages before continue
with the loop. If there is no pages, stop walking the LRU.
- I have got the patch from Roger Larson for the memory pressure and
have partially re implemented/increasing it.
- kswapd rewrite in similar way that Roger Larson one.
- added the function memory_pressure that returns 1 if there is
memory_pressure and 0 if there is no pressure.
- I have got Manfred patch to use test_and_test_and_clear_bit
optimization in ClearPageReferenced.
- Added ClearPageDirty(page) to __remove_inode_pages to solve the
ramfs problems.
- Added __lru_cache_del and __lru_cache_add and use them in
shrink_mmap.
- Makes a cleanup of several cruft in shirk_mmap.
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/asm-i386/bitops.h working/include/asm-i386/bitops.h
--- base/include/asm-i386/bitops.h Mon Jun 19 23:46:11 2000
+++ working/include/asm-i386/bitops.h Tue Jun 20 00:12:49 2000
@@ -29,6 +29,7 @@
extern void change_bit(int nr, volatile void * addr);
extern int test_and_set_bit(int nr, volatile void * addr);
extern int test_and_clear_bit(int nr, volatile void * addr);
+extern int test_and_test_and_clear_bit(int nr, volatile void * addr);
extern int test_and_change_bit(int nr, volatile void * addr);
extern int __constant_test_bit(int nr, const volatile void * addr);
extern int __test_bit(int nr, volatile void * addr);
@@ -123,6 +124,15 @@
(__builtin_constant_p(nr) ? \
__constant_test_bit((nr),(addr)) : \
__test_bit((nr),(addr)))
+
+extern __inline__ int test_and_test_and_clear_bit(int nr, volatile void *addr)
+{
+ if(!test_bit(nr,addr))
+ return 0;
+ return test_and_clear_bit(nr,addr);
+}
+
+
/*
* Find-bit routines..
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/mm.h working/include/linux/mm.h
--- base/include/linux/mm.h Mon Jun 19 23:46:11 2000
+++ working/include/linux/mm.h Tue Jun 20 00:13:00 2000
@@ -203,7 +203,7 @@
#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags)
#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags)
-#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
+#define PageTestandClearReferenced(page) test_and_test_and_clear_bit(PG_referenced, &(page)->flags)
#define PageDecrAfter(page) test_bit(PG_decr_after, &(page)->flags)
#define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags)
#define PageTestandClearDecrAfter(page) test_and_clear_bit(PG_decr_after, &(page)->flags)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/swap.h working/include/linux/swap.h
--- base/include/linux/swap.h Mon Jun 19 23:46:26 2000
+++ working/include/linux/swap.h Tue Jun 20 00:04:12 2000
@@ -87,6 +87,7 @@
/* linux/mm/vmscan.c */
extern int try_to_free_pages(unsigned int gfp_mask);
+extern int memory_pressure(void);
/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *, int);
@@ -173,11 +174,17 @@
/*
* Helper macros for lru_pages handling.
*/
-#define lru_cache_add(page) \
+
+#define __lru_cache_add(page) \
do { \
- spin_lock(&pagemap_lru_lock); \
list_add(&(page)->lru, &lru_cache); \
nr_lru_pages++; \
+} while (0)
+
+#define lru_cache_add(page) \
+do { \
+ spin_lock(&pagemap_lru_lock); \
+ __lru_cache_add(page); \
page->age = PG_AGE_START; \
ClearPageReferenced(page); \
SetPageActive(page); \
@@ -187,7 +194,6 @@
#define __lru_cache_del(page) \
do { \
list_del(&(page)->lru); \
- ClearPageActive(page); \
nr_lru_pages--; \
} while (0)
@@ -196,6 +202,7 @@
if (!PageLocked(page)) \
BUG(); \
spin_lock(&pagemap_lru_lock); \
+ ClearPageActive(page); \
__lru_cache_del(page); \
spin_unlock(&pagemap_lru_lock); \
} while (0)
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/filemap.c working/mm/filemap.c
--- base/mm/filemap.c Mon Jun 19 23:35:41 2000
+++ working/mm/filemap.c Tue Jun 20 00:31:46 2000
@@ -65,8 +65,8 @@
(*p)->pprev_hash = &page->next_hash;
*p = page;
page->pprev_hash = p;
- if (page->buffers)
- PAGE_BUG(page);
+// if (page->buffers)
+// PAGE_BUG(page);
}
static inline void remove_page_from_hash_queue(struct page * page)
@@ -102,6 +102,7 @@
if (page->buffers)
BUG();
+ ClearPageDirty(page);
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
page->mapping = NULL;
@@ -294,36 +295,55 @@
spin_unlock(&pagecache_lock);
}
-/*
- * nr_dirty represents the number of dirty pages that we will write async
- * before doing sync writes. We can only do sync writes if we can
- * wait for IO (__GFP_IO set).
+/**
+ * shrink_mmap - Tries to free memory
+ * @priority: how hard we will try to free pages (0 hardest)
+ * @gfp_mask: Restrictions to free pages
+ *
+ * This function walks the lru list searching for free pages. It
+ * returns 1 to indicate success and 0 in the opposite case. It gets a
+ * lock in the pagemap_lru_lock and the pagecache_lock.
+ */
+/* nr_to_examinate counts the number of pages that we will read as
+ * maximum as each call. This means that we don't loop.
*/
+/* nr_writes counts the number of writes that we have started to the
+ * moment. We limitate the number of writes in each round to
+ * max_page_launder. ToDo: Make that variable tunable through sysctl.
+ */
+const int max_page_launder = 100;
+
int shrink_mmap(int priority, int gfp_mask)
{
- int ret = 0, count, nr_dirty;
struct list_head * page_lru;
struct page * page = NULL;
-
- count = nr_lru_pages / (priority + 1);
- nr_dirty = priority;
+ int ret;
+ int nr_to_examinate = nr_lru_pages;
+ int nr_writes = 0;
+ int count = nr_lru_pages / (priority + 1);
/* we need pagemap_lru_lock for list_del() ... subtle code below */
spin_lock(&pagemap_lru_lock);
while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
+ /* We exit if we have examinated all the LRU pages */
+ if(!nr_to_examinate--)
+ break;
+
+ /* if there is no zone low on memory we return */
+ if(!memory_pressure())
+ break;
+
page = list_entry(page_lru, struct page, lru);
- list_del(page_lru);
+ __lru_cache_del(page);
if (PageTestandClearReferenced(page)) {
- page->age += PG_AGE_ADV;
- if (page->age > PG_AGE_MAX)
- page->age = PG_AGE_MAX;
- goto dispose_continue;
+ page->age = min(PG_AGE_MAX, page->age + PG_AGE_ADV);
+ goto reinsert_page_continue;
}
page->age -= min(PG_AGE_DECL, page->age);
if (page->age)
- goto dispose_continue;
+ goto reinsert_page_continue;
count--;
/*
@@ -331,16 +351,18 @@
* immediate tell are untouchable..
*/
if (!page->buffers && page_count(page) > 1)
- goto dispose_continue;
+ goto reinsert_page_continue;
if (TryLockPage(page))
- goto dispose_continue;
+ goto reinsert_page_continue;
- /* Release the pagemap_lru lock even if the page is not yet
- queued in any lru queue since we have just locked down
- the page so nobody else may SMP race with us running
- a lru_cache_del() (lru_cache_del() always run with the
- page locked down ;). */
+ /*
+ * Release the pagemap_lru lock even if the page is
+ * not yet queued in any lru queue since we have just
+ * locked down the page so nobody else may SMP race
+ * with us running a lru_cache_del() (lru_cache_del()
+ * always run with the page locked down ;).
+ */
spin_unlock(&pagemap_lru_lock);
/* avoid freeing the page while it's locked */
@@ -351,20 +373,34 @@
* of zone - it's old.
*/
if (page->buffers) {
- int wait = ((gfp_mask & __GFP_IO) && (nr_dirty-- < 0));
- if (!try_to_free_buffers(page, wait))
+ if (nr_writes < max_page_launder) {
+ nr_writes++;
+ if (!try_to_free_buffers(page, 0))
+ goto unlock_continue;
+ /* page was locked, inode can't go away under us */
+ if (!page->mapping) {
+ atomic_dec(&buffermem_pages);
+ goto made_buffer_progress;
+ }
+ } else
goto unlock_continue;
- /* page was locked, inode can't go away under us */
- if (!page->mapping) {
- atomic_dec(&buffermem_pages);
- goto made_buffer_progress;
- }
}
-
- /* Take the pagecache_lock spinlock held to avoid
- other tasks to notice the page while we are looking at its
- page count. If it's a pagecache-page we'll free it
- in one atomic transaction after checking its page count. */
+ /*
+ * Page is from a zone we don't care about.
+ * Don't drop page cache entries in vain.
+ */
+ if (page->zone->free_pages > page->zone->pages_high) {
+ /* the page from the wrong zone doesn't count */
+ count++;
+ goto unlock_continue;
+ }
+ /*
+ * Take the pagecache_lock spinlock held to avoid
+ * other tasks to notice the page while we are
+ * looking at its page count. If it's a
+ * pagecache-page we'll free it in one atomic
+ * transaction after checking its page count.
+ */
spin_lock(&pagecache_lock);
/*
@@ -386,14 +422,15 @@
goto made_inode_progress;
}
/* PageDeferswap -> we swap out the page now. */
- if (gfp_mask & __GFP_IO) {
+ if ((gfp_mask & __GFP_IO) && (nr_writes < max_page_launder)) {
spin_unlock(&pagecache_lock);
+ nr_writes++;
/* Do NOT unlock the page ... brw_page does. */
ClearPageDirty(page);
rw_swap_page(WRITE, page, 0);
spin_lock(&pagemap_lru_lock);
page_cache_release(page);
- goto dispose_continue;
+ goto reinsert_page_continue;
}
goto cache_unlock_continue;
}
@@ -416,23 +453,23 @@
spin_lock(&pagemap_lru_lock);
UnlockPage(page);
page_cache_release(page);
-dispose_continue:
- list_add(page_lru, &lru_cache);
+reinsert_page_continue:
+ __lru_cache_add(page);
}
+ spin_unlock(&pagemap_lru_lock);
+ ret = 0;
goto out;
made_inode_progress:
page_cache_release(page);
made_buffer_progress:
+ ClearPageActive(page);
UnlockPage(page);
page_cache_release(page);
ret = 1;
- spin_lock(&pagemap_lru_lock);
- /* nr_lru_pages needs the spinlock */
- nr_lru_pages--;
-
out:
- spin_unlock(&pagemap_lru_lock);
+ if (nr_writes >= (max_page_launder/2))
+ wakeup_bdflush(gfp_mask & __GFP_IO);
return ret;
}
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/swap_state.c working/mm/swap_state.c
--- base/mm/swap_state.c Mon Jun 19 23:35:41 2000
+++ working/mm/swap_state.c Tue Jun 20 00:04:12 2000
@@ -73,7 +73,6 @@
PAGE_BUG(page);
PageClearSwapCache(page);
- ClearPageDirty(page);
remove_inode_page(page);
}
diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/vmscan.c working/mm/vmscan.c
--- base/mm/vmscan.c Mon Jun 19 23:35:41 2000
+++ working/mm/vmscan.c Tue Jun 20 00:08:27 2000
@@ -179,16 +179,14 @@
/* Add it to the swap cache */
add_to_swap_cache(page, entry);
+ set_pte(page_table, swp_entry_to_pte(entry));
/* Put the swap entry into the pte after the page is in swapcache */
vma->vm_mm->rss--;
- set_pte(page_table, swp_entry_to_pte(entry));
flush_tlb_page(vma, address);
vmlist_access_unlock(vma->vm_mm);
- /* OK, do a physical asynchronous write to swap. */
- // rw_swap_page(WRITE, page, 0);
- /* Let shrink_mmap handle this swapout. */
+ /* Set page for deferred swap */
SetPageDirty(page);
UnlockPage(page);
@@ -427,6 +425,32 @@
return __ret;
}
+/**
+ * memory_pressure - Is the system under memory pressure
+ *
+ * Returns 1 if the system is low on memory in any of its zones,
+ * otherwise returns 0.
+ */
+int memory_pressure(void)
+{
+ pg_data_t *pgdat = pgdat_list;
+
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones + i;
+ if (!zone->size || !zone->zone_wake_kswapd)
+ continue;
+ if (zone->free_pages < zone->pages_low)
+ return 1;
+ }
+ pgdat = pgdat->node_next;
+
+ } while (pgdat);
+
+ return 0;
+}
+
/*
* We need to make the locks finer granularity, but right
* now we need this so that we can do page allocations
@@ -445,7 +469,6 @@
int count = FREE_COUNT;
int swap_count = 0;
int made_progress = 0;
- int ret = 0;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
@@ -458,6 +481,8 @@
goto done;
}
+ if (!memory_pressure())
+ return 1;
/* Try to get rid of some shared memory pages.. */
if (gfp_mask & __GFP_IO) {
@@ -506,9 +531,11 @@
* potentially upset the balance between shrink_mmap and
* swap_out.
*/
+
+ if (!memory_pressure())
+ return 1;
if (made_progress) {
made_progress = 0;
- ret = 1;
} else {
priority--;
}
@@ -516,13 +543,12 @@
/* Always end on a shrink_mmap.. */
while (shrink_mmap(0, gfp_mask)) {
- ret = 1;
if (!--count)
goto done;
}
done:
- return ret;
+ return (count < FREE_COUNT);
}
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
@@ -564,26 +590,14 @@
tsk->flags |= PF_MEMALLOC;
for (;;) {
- pg_data_t *pgdat;
- int something_to_do = 0;
+ int pressure = memory_pressure();
- pgdat = pgdat_list;
- do {
- int i;
- for(i = 0; i < MAX_NR_ZONES; i++) {
- zone_t *zone = pgdat->node_zones+ i;
- if (tsk->need_resched)
- schedule();
- if (!zone->size || !zone->zone_wake_kswapd)
- continue;
- if (zone->free_pages < zone->pages_low)
- something_to_do = 1;
- do_try_to_free_pages(GFP_KSWAPD);
- }
- pgdat = pgdat->node_next;
- } while (pgdat);
+ if (tsk->need_resched)
+ schedule();
- if (!something_to_do) {
+ if(pressure)
+ do_try_to_free_pages(GFP_KSWAPD);
+ else {
tsk->state = TASK_INTERRUPTIBLE;
interruptible_sleep_on(&kswapd_wait);
}
--
In theory, practice and theory are the same, but in practice they
are different -- Larry McVoy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2000-06-19 23:15 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2000-06-17 22:45 PATCH: Improvements in shrink_mmap and kswapd Juan J. Quintela
2000-06-17 23:12 ` Rik van Riel
2000-06-17 23:30 ` Roger Larsson
2000-06-17 23:42 ` Philipp Rumpf
2000-06-18 0:00 ` Roger Larsson
2000-06-18 0:51 ` Roger Larsson
2000-06-18 7:57 ` Mike Galbraith
2000-06-19 23:15 ` PATCH: Improvements in shrink_mmap and kswapd (take 2) Juan J. Quintela
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox