From mboxrd@z Thu Jan 1 00:00:00 1970 From: Peter Zijlstra Date: Wed, 12 Jul 2006 16:39:18 +0200 Message-Id: <20060712143918.16998.4797.sendpatchset@lappy> In-Reply-To: <20060712143659.16998.6444.sendpatchset@lappy> References: <20060712143659.16998.6444.sendpatchset@lappy> Subject: [PATCH 12/39] mm: pgrep: move the shrink logic Sender: owner-linux-mm@kvack.org Return-Path: To: linux-mm@kvack.org Cc: Peter Zijlstra List-ID: From: Peter Zijlstra Move the whole per zone shrinker to the policy files. Share the shrink_list logic across policies since it doesn't know about the policy internels anymore and exclusively deals with pageout. API: Shrink the specified zone. void pgrep_shrink(struct zone *, struct scan_control *); Signed-off-by: Peter Zijlstra Signed-off-by: Marcelo Tosatti include/linux/mm_page_replace.h | 2 include/linux/swap.h | 9 + mm/useonce.c | 242 ++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 251 ---------------------------------------- 4 files changed, 257 insertions(+), 247 deletions(-) Index: linux-2.6/include/linux/swap.h =================================================================== --- linux-2.6.orig/include/linux/swap.h 2006-07-12 16:08:18.000000000 +0200 +++ linux-2.6/include/linux/swap.h 2006-07-12 16:11:45.000000000 +0200 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -170,10 +171,16 @@ extern void swap_setup(void); extern void release_pages(struct page **, int, int); /* linux/mm/vmscan.c */ +extern int remove_mapping(struct address_space *mapping, struct page *page); +extern unsigned long shrink_page_list(struct list_head *page_list, + struct scan_control *sc); +extern unsigned long isolate_lru_pages(unsigned long nr_to_scan, + struct list_head *src, struct list_head *dst, + unsigned long *scanned); +extern int should_reclaim_mapped(struct zone *zone); extern unsigned long try_to_free_pages(struct zone **, gfp_t); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -extern int remove_mapping(struct address_space *mapping, struct page *page); /* possible outcome of pageout() */ typedef enum { Index: linux-2.6/mm/useonce.c =================================================================== --- linux-2.6.orig/mm/useonce.c 2006-07-12 16:08:18.000000000 +0200 +++ linux-2.6/mm/useonce.c 2006-07-12 16:11:45.000000000 +0200 @@ -1,8 +1,10 @@ #include -#include #include #include #include +#include +#include /* for try_to_release_page(), + buffer_heads_over_limit */ /** * lru_cache_add: add a page to the page lists @@ -49,3 +51,241 @@ void __pgrep_add_drain(unsigned int cpu) if (pagevec_count(pvec)) __pagevec_pgrep_add(pvec); } + +/* + * shrink_inactive_list() is a helper for shrink_zone(). It returns the number + * of reclaimed pages + */ +static unsigned long shrink_inactive_list(unsigned long max_scan, + struct zone *zone, struct scan_control *sc) +{ + LIST_HEAD(page_list); + struct pagevec pvec; + unsigned long nr_scanned = 0; + unsigned long nr_reclaimed = 0; + pagevec_init(&pvec, 1); + + pgrep_add_drain(); + spin_lock_irq(&zone->lru_lock); + do { + struct page *page; + unsigned long nr_taken; + unsigned long nr_scan; + unsigned long nr_freed; + + nr_taken = isolate_lru_pages(sc->swap_cluster_max, + &zone->inactive_list, + &page_list, &nr_scan); + zone->nr_inactive -= nr_taken; + zone->pages_scanned += nr_scan; + spin_unlock_irq(&zone->lru_lock); + + nr_scanned += nr_scan; + nr_freed = shrink_page_list(&page_list, sc); + nr_reclaimed += nr_freed; + local_irq_disable(); + if (current_is_kswapd()) { + __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); + __mod_page_state(kswapd_steal, nr_freed); + } else + __mod_page_state_zone(zone, pgscan_direct, nr_scan); + __mod_page_state_zone(zone, pgsteal, nr_freed); + + if (nr_taken == 0) + goto done; + + spin_lock(&zone->lru_lock); + /* + * Put back any unfreeable pages. + */ + while (!list_empty(&page_list)) { + page = lru_to_page(&page_list); + BUG_ON(PageLRU(page)); + SetPageLRU(page); + list_del(&page->lru); + if (PageActive(page)) + add_page_to_active_list(zone, page); + else + add_page_to_inactive_list(zone, page); + if (!pagevec_add(&pvec, page)) { + spin_unlock_irq(&zone->lru_lock); + __pagevec_release(&pvec); + spin_lock_irq(&zone->lru_lock); + } + } + } while (nr_scanned < max_scan); + spin_unlock(&zone->lru_lock); +done: + local_irq_enable(); + pagevec_release(&pvec); + return nr_reclaimed; +} + +/* + * This moves pages from the active list to the inactive list. + * + * We move them the other way if the page is referenced by one or more + * processes, from rmap. + * + * If the pages are mostly unmapped, the processing is fast and it is + * appropriate to hold zone->lru_lock across the whole operation. But if + * the pages are mapped, the processing is slow (page_referenced()) so we + * should drop zone->lru_lock around each page. It's impossible to balance + * this, so instead we remove the pages from the LRU while processing them. + * It is safe to rely on PG_active against the non-LRU pages in here because + * nobody will play with that bit on a non-LRU page. + * + * The downside is that we have to touch page->_count against each page. + * But we had to alter page->flags anyway. + */ +static void shrink_active_list(unsigned long nr_pages, struct zone *zone, + struct scan_control *sc, int reclaim_mapped) +{ + unsigned long pgmoved; + int pgdeactivate = 0; + unsigned long pgscanned; + LIST_HEAD(l_hold); /* The pages which were snipped off */ + LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ + LIST_HEAD(l_active); /* Pages to go onto the active_list */ + struct page *page; + struct pagevec pvec; + + if (!sc->may_swap) + reclaim_mapped = 0; + + pgrep_add_drain(); + spin_lock_irq(&zone->lru_lock); + pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, + &l_hold, &pgscanned); + zone->pages_scanned += pgscanned; + zone->nr_active -= pgmoved; + spin_unlock_irq(&zone->lru_lock); + + while (!list_empty(&l_hold)) { + cond_resched(); + page = lru_to_page(&l_hold); + list_del(&page->lru); + if (page_mapped(page)) { + if (!reclaim_mapped || + (total_swap_pages == 0 && PageAnon(page)) || + page_referenced(page, 0)) { + list_add(&page->lru, &l_active); + continue; + } + } + list_add(&page->lru, &l_inactive); + } + + pagevec_init(&pvec, 1); + pgmoved = 0; + spin_lock_irq(&zone->lru_lock); + while (!list_empty(&l_inactive)) { + page = lru_to_page(&l_inactive); + prefetchw_prev_lru_page(page, &l_inactive, flags); + BUG_ON(PageLRU(page)); + SetPageLRU(page); + BUG_ON(!PageActive(page)); + ClearPageActive(page); + + list_move(&page->lru, &zone->inactive_list); + pgmoved++; + if (!pagevec_add(&pvec, page)) { + zone->nr_inactive += pgmoved; + spin_unlock_irq(&zone->lru_lock); + pgdeactivate += pgmoved; + pgmoved = 0; + if (buffer_heads_over_limit) + pagevec_strip(&pvec); + __pagevec_release(&pvec); + spin_lock_irq(&zone->lru_lock); + } + } + zone->nr_inactive += pgmoved; + pgdeactivate += pgmoved; + if (buffer_heads_over_limit) { + spin_unlock_irq(&zone->lru_lock); + pagevec_strip(&pvec); + spin_lock_irq(&zone->lru_lock); + } + + pgmoved = 0; + while (!list_empty(&l_active)) { + page = lru_to_page(&l_active); + prefetchw_prev_lru_page(page, &l_active, flags); + BUG_ON(PageLRU(page)); + SetPageLRU(page); + BUG_ON(!PageActive(page)); + list_move(&page->lru, &zone->active_list); + pgmoved++; + if (!pagevec_add(&pvec, page)) { + zone->nr_active += pgmoved; + pgmoved = 0; + spin_unlock_irq(&zone->lru_lock); + __pagevec_release(&pvec); + spin_lock_irq(&zone->lru_lock); + } + } + zone->nr_active += pgmoved; + spin_unlock(&zone->lru_lock); + + __mod_page_state_zone(zone, pgrefill, pgscanned); + __mod_page_state(pgdeactivate, pgdeactivate); + local_irq_enable(); + + pagevec_release(&pvec); +} + +/* + * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. + */ +unsigned long pgrep_shrink_zone(int priority, struct zone *zone, + struct scan_control *sc) +{ + unsigned long nr_active; + unsigned long nr_inactive; + unsigned long nr_to_scan; + unsigned long nr_reclaimed = 0; + int reclaim_mapped = should_reclaim_mapped(zone); + + atomic_inc(&zone->reclaim_in_progress); + + /* + * Add one to `nr_to_scan' just to make sure that the kernel will + * slowly sift through the active list. + */ + zone->nr_scan_active += (zone->nr_active >> priority) + 1; + nr_active = zone->nr_scan_active; + if (nr_active >= sc->swap_cluster_max) + zone->nr_scan_active = 0; + else + nr_active = 0; + + zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; + nr_inactive = zone->nr_scan_inactive; + if (nr_inactive >= sc->swap_cluster_max) + zone->nr_scan_inactive = 0; + else + nr_inactive = 0; + + while (nr_active || nr_inactive) { + if (nr_active) { + nr_to_scan = min(nr_active, + (unsigned long)sc->swap_cluster_max); + nr_active -= nr_to_scan; + shrink_active_list(nr_to_scan, zone, sc, reclaim_mapped); + } + + if (nr_inactive) { + nr_to_scan = min(nr_inactive, + (unsigned long)sc->swap_cluster_max); + nr_inactive -= nr_to_scan; + nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, + sc); + } + } + + throttle_vm_writeout(); + + atomic_dec(&zone->reclaim_in_progress); + return nr_reclaimed; +} Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c 2006-07-12 16:09:07.000000000 +0200 +++ linux-2.6/mm/vmscan.c 2006-07-12 16:11:46.000000000 +0200 @@ -329,8 +329,8 @@ cannot_free: /* * shrink_page_list() returns the number of reclaimed pages */ -static unsigned long shrink_page_list(struct list_head *page_list, - struct scan_control *sc) +unsigned long shrink_page_list(struct list_head *page_list, + struct scan_control *sc) { LIST_HEAD(ret_pages); struct pagevec freed_pvec; @@ -513,7 +513,7 @@ keep: * * returns how many pages were moved onto *@dst. */ -static unsigned long isolate_lru_pages(unsigned long nr_to_scan, +unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, unsigned long *scanned) { @@ -548,76 +548,6 @@ static unsigned long isolate_lru_pages(u return nr_taken; } -/* - * shrink_inactive_list() is a helper for shrink_zone(). It returns the number - * of reclaimed pages - */ -static unsigned long shrink_inactive_list(unsigned long max_scan, - struct zone *zone, struct scan_control *sc) -{ - LIST_HEAD(page_list); - struct pagevec pvec; - unsigned long nr_scanned = 0; - unsigned long nr_reclaimed = 0; - - pagevec_init(&pvec, 1); - - pgrep_add_drain(); - spin_lock_irq(&zone->lru_lock); - do { - struct page *page; - unsigned long nr_taken; - unsigned long nr_scan; - unsigned long nr_freed; - - nr_taken = isolate_lru_pages(sc->swap_cluster_max, - &zone->inactive_list, - &page_list, &nr_scan); - zone->nr_inactive -= nr_taken; - zone->pages_scanned += nr_scan; - spin_unlock_irq(&zone->lru_lock); - - nr_scanned += nr_scan; - nr_freed = shrink_page_list(&page_list, sc); - nr_reclaimed += nr_freed; - local_irq_disable(); - if (current_is_kswapd()) { - __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); - __mod_page_state(kswapd_steal, nr_freed); - } else - __mod_page_state_zone(zone, pgscan_direct, nr_scan); - __mod_page_state_zone(zone, pgsteal, nr_freed); - - if (nr_taken == 0) - goto done; - - spin_lock(&zone->lru_lock); - /* - * Put back any unfreeable pages. - */ - while (!list_empty(&page_list)) { - page = lru_to_page(&page_list); - BUG_ON(PageLRU(page)); - SetPageLRU(page); - list_del(&page->lru); - if (PageActive(page)) - add_page_to_active_list(zone, page); - else - add_page_to_inactive_list(zone, page); - if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&zone->lru_lock); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); - } - } - } while (nr_scanned < max_scan); - spin_unlock(&zone->lru_lock); -done: - local_irq_enable(); - pagevec_release(&pvec); - return nr_reclaimed; -} - int should_reclaim_mapped(struct zone *zone) { long mapped_ratio; @@ -663,175 +593,6 @@ int should_reclaim_mapped(struct zone *z } /* - * This moves pages from the active list to the inactive list. - * - * We move them the other way if the page is referenced by one or more - * processes, from rmap. - * - * If the pages are mostly unmapped, the processing is fast and it is - * appropriate to hold zone->lru_lock across the whole operation. But if - * the pages are mapped, the processing is slow (page_referenced()) so we - * should drop zone->lru_lock around each page. It's impossible to balance - * this, so instead we remove the pages from the LRU while processing them. - * It is safe to rely on PG_active against the non-LRU pages in here because - * nobody will play with that bit on a non-LRU page. - * - * The downside is that we have to touch page->_count against each page. - * But we had to alter page->flags anyway. - */ -static void shrink_active_list(unsigned long nr_pages, struct zone *zone, - struct scan_control *sc, int reclaim_mapped) -{ - unsigned long pgmoved; - int pgdeactivate = 0; - unsigned long pgscanned; - LIST_HEAD(l_hold); /* The pages which were snipped off */ - LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ - LIST_HEAD(l_active); /* Pages to go onto the active_list */ - struct page *page; - struct pagevec pvec; - - if (!sc->may_swap) - reclaim_mapped = 0; - - pgrep_add_drain(); - spin_lock_irq(&zone->lru_lock); - pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, - &l_hold, &pgscanned); - zone->pages_scanned += pgscanned; - zone->nr_active -= pgmoved; - spin_unlock_irq(&zone->lru_lock); - - while (!list_empty(&l_hold)) { - cond_resched(); - page = lru_to_page(&l_hold); - list_del(&page->lru); - if (page_mapped(page)) { - if (!reclaim_mapped || - (total_swap_pages == 0 && PageAnon(page)) || - page_referenced(page, 0)) { - list_add(&page->lru, &l_active); - continue; - } - } - list_add(&page->lru, &l_inactive); - } - - pagevec_init(&pvec, 1); - pgmoved = 0; - spin_lock_irq(&zone->lru_lock); - while (!list_empty(&l_inactive)) { - page = lru_to_page(&l_inactive); - prefetchw_prev_lru_page(page, &l_inactive, flags); - BUG_ON(PageLRU(page)); - SetPageLRU(page); - BUG_ON(!PageActive(page)); - ClearPageActive(page); - - list_move(&page->lru, &zone->inactive_list); - pgmoved++; - if (!pagevec_add(&pvec, page)) { - zone->nr_inactive += pgmoved; - spin_unlock_irq(&zone->lru_lock); - pgdeactivate += pgmoved; - pgmoved = 0; - if (buffer_heads_over_limit) - pagevec_strip(&pvec); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); - } - } - zone->nr_inactive += pgmoved; - pgdeactivate += pgmoved; - if (buffer_heads_over_limit) { - spin_unlock_irq(&zone->lru_lock); - pagevec_strip(&pvec); - spin_lock_irq(&zone->lru_lock); - } - - pgmoved = 0; - while (!list_empty(&l_active)) { - page = lru_to_page(&l_active); - prefetchw_prev_lru_page(page, &l_active, flags); - BUG_ON(PageLRU(page)); - SetPageLRU(page); - BUG_ON(!PageActive(page)); - list_move(&page->lru, &zone->active_list); - pgmoved++; - if (!pagevec_add(&pvec, page)) { - zone->nr_active += pgmoved; - pgmoved = 0; - spin_unlock_irq(&zone->lru_lock); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); - } - } - zone->nr_active += pgmoved; - spin_unlock(&zone->lru_lock); - - __mod_page_state_zone(zone, pgrefill, pgscanned); - __mod_page_state(pgdeactivate, pgdeactivate); - local_irq_enable(); - - pagevec_release(&pvec); -} - -/* - * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. - */ -static unsigned long shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) -{ - unsigned long nr_active; - unsigned long nr_inactive; - unsigned long nr_to_scan; - unsigned long nr_reclaimed = 0; - int reclaim_mapped = should_reclaim_mapped(zone); - - atomic_inc(&zone->reclaim_in_progress); - - /* - * Add one to `nr_to_scan' just to make sure that the kernel will - * slowly sift through the active list. - */ - zone->nr_scan_active += (zone->nr_active >> priority) + 1; - nr_active = zone->nr_scan_active; - if (nr_active >= sc->swap_cluster_max) - zone->nr_scan_active = 0; - else - nr_active = 0; - - zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; - nr_inactive = zone->nr_scan_inactive; - if (nr_inactive >= sc->swap_cluster_max) - zone->nr_scan_inactive = 0; - else - nr_inactive = 0; - - while (nr_active || nr_inactive) { - if (nr_active) { - nr_to_scan = min(nr_active, - (unsigned long)sc->swap_cluster_max); - nr_active -= nr_to_scan; - shrink_active_list(nr_to_scan, zone, sc, reclaim_mapped); - } - - if (nr_inactive) { - nr_to_scan = min(nr_inactive, - (unsigned long)sc->swap_cluster_max); - nr_inactive -= nr_to_scan; - nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, - sc); - } - } - - throttle_vm_writeout(); - - atomic_dec(&zone->reclaim_in_progress); - return nr_reclaimed; -} - -/* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation * request. @@ -869,7 +630,7 @@ static unsigned long shrink_zones(int pr if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ - nr_reclaimed += shrink_zone(priority, zone, sc); + nr_reclaimed += pgrep_shrink_zone(priority, zone, sc); } return nr_reclaimed; } @@ -1085,7 +846,7 @@ scan: if (zone->prev_priority > priority) zone->prev_priority = priority; sc.nr_scanned = 0; - nr_reclaimed += shrink_zone(priority, zone, &sc); + nr_reclaimed += pgrep_shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); @@ -1377,7 +1138,7 @@ static int __zone_reclaim(struct zone *z */ priority = ZONE_RECLAIM_PRIORITY; do { - nr_reclaimed += shrink_zone(priority, zone, &sc); + nr_reclaimed += pgrep_shrink_zone(priority, zone, &sc); priority--; } while (priority >= 0 && nr_reclaimed < nr_pages); Index: linux-2.6/include/linux/mm_page_replace.h =================================================================== --- linux-2.6.orig/include/linux/mm_page_replace.h 2006-07-12 16:09:18.000000000 +0200 +++ linux-2.6/include/linux/mm_page_replace.h 2006-07-12 16:11:47.000000000 +0200 @@ -78,6 +78,8 @@ typedef enum { /* reclaim_t pgrep_reclaimable(struct page *); */ /* int pgrep_activate(struct page *page); */ /* void pgrep_mark_accessed(struct page *); */ +extern unsigned long pgrep_shrink_zone(int priority, struct zone *zone, + struct scan_control *sc); #ifdef CONFIG_MM_POLICY_USEONCE -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org