Implement an approximation to Song Jiang's CLOCK-Pro page replacement algorithm. The algorithm has been extended to handle multiple memory zones and, consequently, needed some changes in the active page limit readjustment. TODO: - verify that things work as expected - figure out where to put new anonymous pages More information can be found at: - http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html - http://linux-mm.org/wiki/ClockProApproximation Signed-off-by: Rik van Riel Index: linux-2.6.12-vm/include/linux/mmzone.h =================================================================== --- linux-2.6.12-vm.orig/include/linux/mmzone.h +++ linux-2.6.12-vm/include/linux/mmzone.h @@ -143,6 +143,8 @@ struct zone { unsigned long nr_inactive; unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ + unsigned long active_limit; + unsigned long active_scanned; /* * prev_priority holds the scanning priority for this zone. It is Index: linux-2.6.12-vm/include/linux/swap.h =================================================================== --- linux-2.6.12-vm.orig/include/linux/swap.h +++ linux-2.6.12-vm/include/linux/swap.h @@ -154,10 +154,15 @@ extern void out_of_memory(unsigned int _ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/nonresident.c */ -extern int remember_page(struct address_space *, unsigned long); +extern int do_remember_page(struct address_space *, unsigned long); extern int recently_evicted(struct address_space *, unsigned long); extern void init_nonresident(void); +/* linux/mm/clockpro.c */ +extern void remember_page(struct page *, struct address_space *, unsigned long); +extern int page_is_hot(struct page *, struct address_space *, unsigned long); +DECLARE_PER_CPU(unsigned long, evicted_pages); + /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalhigh_pages; @@ -298,6 +303,9 @@ static inline swp_entry_t get_swap_page( #define remember_page(x,y) 0 #define recently_evicted(x,y) 0 +/* linux/mm/clockpro.c */ +#define page_is_hot(x,y,z) 0 + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ Index: linux-2.6.12-vm/mm/Makefile =================================================================== --- linux-2.6.12-vm.orig/mm/Makefile +++ linux-2.6.12-vm/mm/Makefile @@ -13,7 +13,7 @@ obj-y := bootmem.o filemap.o mempool.o prio_tree.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o \ - nonresident.o + nonresident.o clockpro.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SHMEM) += shmem.o Index: linux-2.6.12-vm/mm/clockpro.c =================================================================== --- /dev/null +++ linux-2.6.12-vm/mm/clockpro.c @@ -0,0 +1,102 @@ +/* + * mm/clockpro.c + * (C) 2005 Red Hat, Inc + * Written by Rik van Riel + * Released under the GPL, see the file COPYING for details. + * + * Helper functions to implement CLOCK-Pro page replacement policy. + * For details see: http://linux-mm.org/wiki/AdvancedPageReplacement + */ +#include +#include +#include + +DEFINE_PER_CPU(unsigned long, evicted_pages); +static unsigned long get_evicted(void) +{ + unsigned long total = 0; + int cpu; + + for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++) + total += per_cpu(evicted_pages, cpu); + + return total; +} + +static unsigned long estimate_pageable_memory(void) +{ + static unsigned long next_check; + static unsigned long total; + unsigned long active, inactive, free; + + if (time_after(jiffies, next_check)) { + get_zone_counts(&active, &inactive, &free); + total = active + inactive + free; + next_check = jiffies + HZ/10; + } + + return total; +} + +static void decay_clockpro_variables(void) +{ + struct zone * zone; + int cpu; + + for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++) + per_cpu(evicted_pages, cpu) /= 2; + + for_each_zone(zone) + zone->active_scanned /= 2; +} + +int page_is_hot(struct page * page, struct address_space * mapping, + unsigned long index) +{ + unsigned long long distance; + unsigned long long evicted; + int refault_distance; + struct zone *zone; + + /* Was the page recently evicted ? */ + refault_distance = recently_evicted(mapping, index); + if (refault_distance < 0) + return 0; + + distance = estimate_pageable_memory() + refault_distance; + evicted = get_evicted(); + zone = page_zone(page); + + /* Only consider recent history for the calculation below. */ + if (unlikely(evicted > distance)) + decay_clockpro_variables(); + + /* + * Estimate whether the inter-reference distance of the tested + * page is smaller than the inter-reference distance of the + * oldest page on the active list. + * + * distance zone->nr_active + * ---------- < ---------------------- + * evicted zone->active_scanned + */ + if (distance * zone->active_scanned < evicted * zone->nr_active) { + if (zone->active_limit > zone->present_pages / 8) + zone->active_limit--; + return 1; + } + + /* Increase the active limit more slowly. */ + if ((evicted & 1) && zone->active_limit < zone->present_pages * 7 / 8) + zone->active_limit++; + return 0; +} + +void remember_page(struct page * page, struct address_space * mapping, + unsigned long index) +{ + struct zone * zone = page_zone(page); + if (do_remember_page(mapping, index) && (index & 1) && + zone->active_limit < zone->present_pages * 7 / 8) + zone->active_limit++; +} Index: linux-2.6.12-vm/mm/filemap.c =================================================================== --- linux-2.6.12-vm.orig/mm/filemap.c +++ linux-2.6.12-vm/mm/filemap.c @@ -401,9 +401,12 @@ int add_to_page_cache_lru(struct page *p pgoff_t offset, int gfp_mask) { int ret = add_to_page_cache(page, mapping, offset, gfp_mask); - recently_evicted(mapping, offset); - if (ret == 0) - lru_cache_add(page); + if (ret == 0) { + if (page_is_hot(page, mapping, offset)) + lru_cache_add_active(page); + else + lru_cache_add(page); + } return ret; } Index: linux-2.6.12-vm/mm/nonresident.c =================================================================== --- linux-2.6.12-vm.orig/mm/nonresident.c +++ linux-2.6.12-vm/mm/nonresident.c @@ -25,6 +25,7 @@ #include #include #include +#include /* Number of non-resident pages per hash bucket. Never smaller than 15. */ #if (L1_CACHE_BYTES < 64) @@ -101,7 +102,7 @@ int recently_evicted(struct address_spac return -1; } -int remember_page(struct address_space * mapping, unsigned long index) +int do_remember_page(struct address_space * mapping, unsigned long index) { struct nr_bucket * nr_bucket; u32 nrpage; @@ -125,6 +126,7 @@ int remember_page(struct address_space * preempt_enable(); /* Statistics may want to know whether the entry was in use. */ + __get_cpu_var(evicted_pages)++; return xchg(&nr_bucket->page[i], nrpage); } Index: linux-2.6.12-vm/mm/page_alloc.c =================================================================== --- linux-2.6.12-vm.orig/mm/page_alloc.c +++ linux-2.6.12-vm/mm/page_alloc.c @@ -1715,6 +1715,7 @@ static void __init free_area_init_core(s zone->nr_scan_inactive = 0; zone->nr_active = 0; zone->nr_inactive = 0; + zone->active_limit = zone->present_pages * 2 / 3; if (!size) continue; Index: linux-2.6.12-vm/mm/swap_state.c =================================================================== --- linux-2.6.12-vm.orig/mm/swap_state.c +++ linux-2.6.12-vm/mm/swap_state.c @@ -323,6 +323,7 @@ struct page *read_swap_cache_async(swp_e struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; + int active; int err; do { @@ -344,7 +345,7 @@ struct page *read_swap_cache_async(swp_e break; /* Out of memory */ } - recently_evicted(&swapper_space, entry.val); + active = page_is_hot(new_page, &swapper_space, entry.val); /* * Associate the page with swap entry in the swap cache. @@ -361,7 +362,10 @@ struct page *read_swap_cache_async(swp_e /* * Initiate read into locked page and return. */ - lru_cache_add_active(new_page); + if (active) { + lru_cache_add_active(new_page); + } else + lru_cache_add(new_page); swap_readpage(NULL, new_page); return new_page; } Index: linux-2.6.12-vm/mm/vmscan.c =================================================================== --- linux-2.6.12-vm.orig/mm/vmscan.c +++ linux-2.6.12-vm/mm/vmscan.c @@ -355,12 +355,14 @@ static int shrink_list(struct list_head while (!list_empty(page_list)) { struct address_space *mapping; struct page *page; + struct zone *zone; int may_enter_fs; int referenced; cond_resched(); page = lru_to_page(page_list); + zone = page_zone(page); list_del(&page->lru); if (TestSetPageLocked(page)) @@ -492,7 +494,7 @@ static int shrink_list(struct list_head #ifdef CONFIG_SWAP if (PageSwapCache(page)) { swp_entry_t swap = { .val = page->private }; - remember_page(&swapper_space, page->private); + remember_page(page, &swapper_space, page->private); __delete_from_swap_cache(page); write_unlock_irq(&mapping->tree_lock); swap_free(swap); @@ -501,7 +503,7 @@ static int shrink_list(struct list_head } #endif /* CONFIG_SWAP */ - remember_page(page->mapping, page->index); + remember_page(page, page->mapping, page->index); __remove_from_page_cache(page); write_unlock_irq(&mapping->tree_lock); __put_page(page); @@ -684,6 +686,7 @@ refill_inactive_zone(struct zone *zone, pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, &l_hold, &pgscanned); zone->pages_scanned += pgscanned; + zone->active_scanned += pgscanned; zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); @@ -799,10 +802,15 @@ shrink_zone(struct zone *zone, struct sc unsigned long nr_inactive; /* - * Add one to `nr_to_scan' just to make sure that the kernel will - * slowly sift through the active list. + * Scan the active list if we have too many active pages. + * The limit is automatically adjusted through refaults + * measuring how well the VM did in the past. */ - zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; + if (zone->nr_active > zone->active_limit) + zone->nr_scan_active += zone->nr_active - zone->active_limit; + else if (sc->priority < DEF_PRIORITY - 2) + zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; + nr_active = zone->nr_scan_active; if (nr_active >= sc->swap_cluster_max) zone->nr_scan_active = 0; -- -- All Rights Reversed -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org