From: a.p.zijlstra@chello.nl
To: linux-mm@kvack.org
Subject: [RFC][patch 6/6] CART Implementation ver 2
Date: Mon, 29 Aug 2005 06:31:39 +0200 [thread overview]
Message-ID: <20050829044034.875236000@twins> (raw)
In-Reply-To: <20050829043132.908007000@twins>
[-- Attachment #1: cart-use-cart.patch --]
[-- Type: text/plain, Size: 15863 bytes --]
Index: linux-2.6-git/fs/exec.c
===================================================================
--- linux-2.6-git.orig/fs/exec.c
+++ linux-2.6-git/fs/exec.c
@@ -331,7 +331,7 @@ void install_arg_page(struct vm_area_str
goto out;
}
inc_mm_counter(mm, rss);
- lru_cache_add_active(page);
+ lru_cache_add(page);
set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
page, vma->vm_page_prot))));
page_add_anon_rmap(page, vma, address);
Index: linux-2.6-git/include/linux/swap.h
===================================================================
--- linux-2.6-git.orig/include/linux/swap.h
+++ linux-2.6-git/include/linux/swap.h
@@ -191,8 +191,6 @@ extern unsigned int nr_free_pagecache_pa
/* linux/mm/swap.c */
extern void FASTCALL(lru_cache_add(struct page *));
-extern void FASTCALL(lru_cache_add_active(struct page *));
-extern void FASTCALL(activate_page(struct page *));
extern void FASTCALL(mark_page_accessed(struct page *));
extern void lru_add_drain(void);
extern int rotate_reclaimable_page(struct page *page);
Index: linux-2.6-git/mm/memory.c
===================================================================
--- linux-2.6-git.orig/mm/memory.c
+++ linux-2.6-git/mm/memory.c
@@ -1304,7 +1304,7 @@ static int do_wp_page(struct mm_struct *
page_remove_rmap(old_page);
flush_cache_page(vma, address, pfn);
break_cow(vma, new_page, address, page_table);
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
page_add_anon_rmap(new_page, vma, address);
/* Free the old page.. */
@@ -1782,7 +1782,7 @@ do_anonymous_page(struct mm_struct *mm,
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
- lru_cache_add_active(page);
+ lru_cache_add(page);
SetPageReferenced(page);
page_add_anon_rmap(page, vma, addr);
}
@@ -1903,7 +1903,8 @@ retry:
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
+ SetPageReferenced(new_page);
page_add_anon_rmap(new_page, vma, address);
} else
page_add_file_rmap(new_page);
Index: linux-2.6-git/mm/swap.c
===================================================================
--- linux-2.6-git.orig/mm/swap.c
+++ linux-2.6-git/mm/swap.c
@@ -78,16 +78,17 @@ int rotate_reclaimable_page(struct page
return 1;
if (PageDirty(page))
return 1;
- if (PageActive(page))
- return 1;
if (!PageLRU(page))
return 1;
zone = page_zone(page);
spin_lock_irqsave(&zone->lru_lock, flags);
- if (PageLRU(page) && !PageActive(page)) {
+ if (PageLRU(page)) {
list_del(&page->lru);
- list_add_tail(&page->lru, &zone->inactive_list);
+ if (PageActive(page))
+ list_add(&page->lru, &zone->active_list);
+ else
+ list_add(&page->lru, &zone->inactive_list);
inc_page_state(pgrotated);
}
if (!test_clear_page_writeback(page))
@@ -112,7 +113,6 @@ EXPORT_SYMBOL(mark_page_accessed);
* @page: the page to add
*/
static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
void fastcall lru_cache_add(struct page *page)
{
@@ -124,25 +124,12 @@ void fastcall lru_cache_add(struct page
put_cpu_var(lru_add_pvecs);
}
-void fastcall lru_cache_add_active(struct page *page)
-{
- struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
-
- page_cache_get(page);
- if (!pagevec_add(pvec, page))
- __pagevec_lru_add_active(pvec);
- put_cpu_var(lru_add_active_pvecs);
-}
-
void lru_add_drain(void)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
if (pagevec_count(pvec))
__pagevec_lru_add(pvec);
- pvec = &__get_cpu_var(lru_add_active_pvecs);
- if (pagevec_count(pvec))
- __pagevec_lru_add_active(pvec);
put_cpu_var(lru_add_pvecs);
}
@@ -278,7 +265,9 @@ void __pagevec_lru_add(struct pagevec *p
}
if (TestSetPageLRU(page))
BUG();
- add_page_to_inactive_list(zone, page);
+ if (TestClearPageActive(page))
+ BUG();
+ __cart_insert(zone, page);
}
if (zone)
spin_unlock_irq(&zone->lru_lock);
@@ -288,33 +277,6 @@ void __pagevec_lru_add(struct pagevec *p
EXPORT_SYMBOL(__pagevec_lru_add);
-void __pagevec_lru_add_active(struct pagevec *pvec)
-{
- int i;
- struct zone *zone = NULL;
-
- for (i = 0; i < pagevec_count(pvec); i++) {
- struct page *page = pvec->pages[i];
- struct zone *pagezone = page_zone(page);
-
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
- }
- if (TestSetPageLRU(page))
- BUG();
- if (TestSetPageActive(page))
- BUG();
- add_page_to_active_list(zone, page);
- }
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- release_pages(pvec->pages, pvec->nr, pvec->cold);
- pagevec_reinit(pvec);
-}
-
/*
* Try to drop buffers from the pages in a pagevec
*/
@@ -396,9 +358,6 @@ static void lru_drain_cache(unsigned int
/* CPU is dead, so no locking needed. */
if (pagevec_count(pvec))
__pagevec_lru_add(pvec);
- pvec = &per_cpu(lru_add_active_pvecs, cpu);
- if (pagevec_count(pvec))
- __pagevec_lru_add_active(pvec);
}
/* Drop the CPU's cached committed space back into the central pool. */
Index: linux-2.6-git/mm/swap_state.c
===================================================================
--- linux-2.6-git.orig/mm/swap_state.c
+++ linux-2.6-git/mm/swap_state.c
@@ -359,7 +359,7 @@ struct page *read_swap_cache_async(swp_e
/*
* Initiate read into locked page and return.
*/
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
swap_readpage(NULL, new_page);
return new_page;
}
Index: linux-2.6-git/mm/vmscan.c
===================================================================
--- linux-2.6-git.orig/mm/vmscan.c
+++ linux-2.6-git/mm/vmscan.c
@@ -376,8 +376,6 @@ static int shrink_list(struct list_head
if (TestSetPageLocked(page))
goto keep;
- BUG_ON(PageActive(page));
-
sc->nr_scanned++;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
@@ -498,6 +496,7 @@ static int shrink_list(struct list_head
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page->private };
+ cart_remember(page);
__delete_from_swap_cache(page);
write_unlock_irq(&mapping->tree_lock);
swap_free(swap);
@@ -506,11 +505,13 @@ static int shrink_list(struct list_head
}
#endif /* CONFIG_SWAP */
+ cart_remember(page);
__remove_from_page_cache(page);
write_unlock_irq(&mapping->tree_lock);
__put_page(page);
free_it:
+ ClearPageActive(page);
unlock_page(page);
reclaimed++;
if (!pagevec_add(&freed_pvec, page))
@@ -535,55 +536,6 @@ keep:
}
/*
- * zone->lru_lock is heavily contended. Some of the functions that
- * shrink the lists perform better by taking out a batch of pages
- * and working on them outside the LRU lock.
- *
- * For pagecache intensive workloads, this function is the hottest
- * spot in the kernel (apart from copy_*_user functions).
- *
- * Appropriate locks must be held before calling this function.
- *
- * @nr_to_scan: The number of pages to look through on the list.
- * @src: The LRU list to pull pages off.
- * @dst: The temp list to put pages on to.
- * @scanned: The number of pages that were scanned.
- *
- * returns how many pages were moved onto *@dst.
- */
-static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
- struct list_head *dst, int *scanned)
-{
- int nr_taken = 0;
- struct page *page;
- int scan = 0;
-
- while (scan++ < nr_to_scan && !list_empty(src)) {
- page = lru_to_page(src);
- prefetchw_prev_lru_page(page, src, flags);
-
- if (!TestClearPageLRU(page))
- BUG();
- list_del(&page->lru);
- if (get_page_testone(page)) {
- /*
- * It is being freed elsewhere
- */
- __put_page(page);
- SetPageLRU(page);
- list_add(&page->lru, src);
- continue;
- } else {
- list_add(&page->lru, dst);
- nr_taken++;
- }
- }
-
- *scanned = scan;
- return nr_taken;
-}
-
-/*
* shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed
*/
static void shrink_cache(struct zone *zone, struct scan_control *sc)
@@ -595,19 +547,15 @@ static void shrink_cache(struct zone *zo
pagevec_init(&pvec, 1);
lru_add_drain();
- spin_lock_irq(&zone->lru_lock);
while (max_scan > 0) {
- struct page *page;
int nr_taken;
- int nr_scan;
+ int nr_scan = 0;
int nr_freed;
- nr_taken = isolate_lru_pages(sc->swap_cluster_max,
- &zone->inactive_list,
- &page_list, &nr_scan);
- zone->nr_inactive -= nr_taken;
+ nr_taken = cart_replace(zone, &page_list, sc->swap_cluster_max,
+ &nr_scan);
+ /* we race a bit, do we care? */
zone->pages_scanned += nr_scan;
- spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
goto done;
@@ -623,220 +571,44 @@ static void shrink_cache(struct zone *zo
mod_page_state_zone(zone, pgsteal, nr_freed);
sc->nr_to_reclaim -= nr_freed;
- spin_lock_irq(&zone->lru_lock);
/*
* Put back any unfreeable pages.
*/
- while (!list_empty(&page_list)) {
- page = lru_to_page(&page_list);
- if (TestSetPageLRU(page))
- BUG();
- list_del(&page->lru);
- if (PageActive(page))
- add_page_to_active_list(zone, page);
- else
- add_page_to_inactive_list(zone, page);
- if (!pagevec_add(&pvec, page)) {
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
+ cart_reinsert(zone, &page_list);
}
- spin_unlock_irq(&zone->lru_lock);
done:
pagevec_release(&pvec);
}
/*
- * This moves pages from the active list to the inactive list.
- *
- * We move them the other way if the page is referenced by one or more
- * processes, from rmap.
- *
- * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone->lru_lock across the whole operation. But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone->lru_lock around each page. It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
- * It is safe to rely on PG_active against the non-LRU pages in here because
- * nobody will play with that bit on a non-LRU page.
- *
- * The downside is that we have to touch page->_count against each page.
- * But we had to alter page->flags anyway.
- */
-static void
-refill_inactive_zone(struct zone *zone, struct scan_control *sc)
-{
- int pgmoved;
- int pgdeactivate = 0;
- int pgscanned;
- int nr_pages = sc->nr_to_scan;
- LIST_HEAD(l_hold); /* The pages which were snipped off */
- LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
- LIST_HEAD(l_active); /* Pages to go onto the active_list */
- struct page *page;
- struct pagevec pvec;
- int reclaim_mapped = 0;
- long mapped_ratio;
- long distress;
- long swap_tendency;
-
- lru_add_drain();
- spin_lock_irq(&zone->lru_lock);
- pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
- &l_hold, &pgscanned);
- zone->pages_scanned += pgscanned;
- zone->nr_active -= pgmoved;
- spin_unlock_irq(&zone->lru_lock);
-
- /*
- * `distress' is a measure of how much trouble we're having reclaiming
- * pages. 0 -> no problems. 100 -> great trouble.
- */
- distress = 100 >> zone->prev_priority;
-
- /*
- * The point of this algorithm is to decide when to start reclaiming
- * mapped memory instead of just pagecache. Work out how much memory
- * is mapped.
- */
- mapped_ratio = (sc->nr_mapped * 100) / total_memory;
-
- /*
- * Now decide how much we really want to unmap some pages. The mapped
- * ratio is downgraded - just because there's a lot of mapped memory
- * doesn't necessarily mean that page reclaim isn't succeeding.
- *
- * The distress ratio is important - we don't want to start going oom.
- *
- * A 100% value of vm_swappiness overrides this algorithm altogether.
- */
- swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
-
- /*
- * Now use this metric to decide whether to start moving mapped memory
- * onto the inactive list.
- */
- if (swap_tendency >= 100)
- reclaim_mapped = 1;
-
- while (!list_empty(&l_hold)) {
- cond_resched();
- page = lru_to_page(&l_hold);
- list_del(&page->lru);
- if (page_mapped(page)) {
- if (!reclaim_mapped ||
- (total_swap_pages == 0 && PageAnon(page)) ||
- page_referenced(page, 0, sc->priority <= 0)) {
- list_add(&page->lru, &l_active);
- continue;
- }
- }
- list_add(&page->lru, &l_inactive);
- }
-
- pagevec_init(&pvec, 1);
- pgmoved = 0;
- spin_lock_irq(&zone->lru_lock);
- while (!list_empty(&l_inactive)) {
- page = lru_to_page(&l_inactive);
- prefetchw_prev_lru_page(page, &l_inactive, flags);
- if (TestSetPageLRU(page))
- BUG();
- if (!TestClearPageActive(page))
- BUG();
- list_move(&page->lru, &zone->inactive_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_inactive += pgmoved;
- spin_unlock_irq(&zone->lru_lock);
- pgdeactivate += pgmoved;
- pgmoved = 0;
- if (buffer_heads_over_limit)
- pagevec_strip(&pvec);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
- zone->nr_inactive += pgmoved;
- pgdeactivate += pgmoved;
- if (buffer_heads_over_limit) {
- spin_unlock_irq(&zone->lru_lock);
- pagevec_strip(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
-
- pgmoved = 0;
- while (!list_empty(&l_active)) {
- page = lru_to_page(&l_active);
- prefetchw_prev_lru_page(page, &l_active, flags);
- if (TestSetPageLRU(page))
- BUG();
- BUG_ON(!PageActive(page));
- list_move(&page->lru, &zone->active_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_active += pgmoved;
- pgmoved = 0;
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
- zone->nr_active += pgmoved;
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
-
- mod_page_state_zone(zone, pgrefill, pgscanned);
- mod_page_state(pgdeactivate, pgdeactivate);
-}
-
-/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
static void
shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_active;
- unsigned long nr_inactive;
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
- zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+ zone->nr_scan_active += ((zone->nr_active + zone->nr_inactive) >> sc->priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= sc->swap_cluster_max)
zone->nr_scan_active = 0;
else
nr_active = 0;
- zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
- nr_inactive = zone->nr_scan_inactive;
- if (nr_inactive >= sc->swap_cluster_max)
- zone->nr_scan_inactive = 0;
- else
- nr_inactive = 0;
sc->nr_to_reclaim = sc->swap_cluster_max;
- while (nr_active || nr_inactive) {
- if (nr_active) {
- sc->nr_to_scan = min(nr_active,
- (unsigned long)sc->swap_cluster_max);
- nr_active -= sc->nr_to_scan;
- refill_inactive_zone(zone, sc);
- }
-
- if (nr_inactive) {
- sc->nr_to_scan = min(nr_inactive,
- (unsigned long)sc->swap_cluster_max);
- nr_inactive -= sc->nr_to_scan;
- shrink_cache(zone, sc);
- if (sc->nr_to_reclaim <= 0)
- break;
- }
+ while (nr_active) {
+ sc->nr_to_scan = min(nr_active,
+ (unsigned long)sc->swap_cluster_max);
+ nr_active -= sc->nr_to_scan;
+ shrink_cache(zone, sc);
+ if (sc->nr_to_reclaim <= 0)
+ break;
}
throttle_vm_writeout();
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
prev parent reply other threads:[~2005-08-29 4:40 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-08-29 4:31 [RFC][patch 0/6] " a.p.zijlstra
2005-08-29 4:31 ` [RFC][patch 1/6] " a.p.zijlstra
2005-08-29 4:31 ` [RFC][patch 2/6] " a.p.zijlstra
2005-08-29 4:31 ` [RFC][patch 3/6] " a.p.zijlstra
2005-08-29 4:31 ` [RFC][patch 4/6] " a.p.zijlstra
2005-08-29 4:31 ` [RFC][patch 5/6] " a.p.zijlstra
2005-08-29 4:31 ` a.p.zijlstra [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050829044034.875236000@twins \
--to=a.p.zijlstra@chello.nl \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox