linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Rik van Riel <riel@redhat.com>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH/RFT 4/5] CLOCK-Pro page replacement
Date: Wed, 10 Aug 2005 16:02:20 -0400	[thread overview]
Message-ID: <20050810200943.809832000@jumble.boston.redhat.com> (raw)
In-Reply-To: <20050810200216.644997000@jumble.boston.redhat.com>

[-- Attachment #1: clockpro --]
[-- Type: text/plain, Size: 11028 bytes --]

Implement an approximation to Song Jiang's CLOCK-Pro page replacement
algorithm.  The algorithm has been extended to handle multiple memory
zones and, consequently, needed some changes in the active page limit
readjustment.

TODO:
 - verify that things work as expected
 - figure out where to put new anonymous pages

More information can be found at:
 - http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
 - http://linux-mm.org/wiki/ClockProApproximation

Signed-off-by: Rik van Riel <riel@redhat.com>

Index: linux-2.6.12-vm/include/linux/mmzone.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/mmzone.h
+++ linux-2.6.12-vm/include/linux/mmzone.h
@@ -143,6 +143,8 @@ struct zone {
 	unsigned long		nr_inactive;
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
+	unsigned long		active_limit;
+	unsigned long		active_scanned;
 
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
Index: linux-2.6.12-vm/include/linux/swap.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/swap.h
+++ linux-2.6.12-vm/include/linux/swap.h
@@ -154,10 +154,15 @@ extern void out_of_memory(unsigned int _
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
 
 /* linux/mm/nonresident.c */
-extern int remember_page(struct address_space *, unsigned long);
+extern int do_remember_page(struct address_space *, unsigned long);
 extern int recently_evicted(struct address_space *, unsigned long);
 extern void init_nonresident(void);
 
+/* linux/mm/clockpro.c */
+extern void remember_page(struct page *, struct address_space *, unsigned long);
+extern int page_is_hot(struct page *, struct address_space *, unsigned long);
+DECLARE_PER_CPU(unsigned long, evicted_pages);
+
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalhigh_pages;
@@ -298,6 +303,9 @@ static inline swp_entry_t get_swap_page(
 #define remember_page(x,y)	0
 #define recently_evicted(x,y)	0
 
+/* linux/mm/clockpro.c */
+#define page_is_hot(x,y,z)	0
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
Index: linux-2.6.12-vm/mm/Makefile
===================================================================
--- linux-2.6.12-vm.orig/mm/Makefile
+++ linux-2.6.12-vm/mm/Makefile
@@ -13,7 +13,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   prio_tree.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o \
-			   nonresident.o
+			   nonresident.o clockpro.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SHMEM) += shmem.o
Index: linux-2.6.12-vm/mm/clockpro.c
===================================================================
--- /dev/null
+++ linux-2.6.12-vm/mm/clockpro.c
@@ -0,0 +1,102 @@
+/*
+ * mm/clockpro.c
+ * (C) 2005 Red Hat, Inc
+ * Written by Rik van Riel <riel@redhat.com>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Helper functions to implement CLOCK-Pro page replacement policy.
+ * For details see: http://linux-mm.org/wiki/AdvancedPageReplacement
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+
+DEFINE_PER_CPU(unsigned long, evicted_pages);
+static unsigned long get_evicted(void)
+{
+	unsigned long total = 0;
+	int cpu;
+
+	for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+		total += per_cpu(evicted_pages, cpu);
+
+	return total;
+}
+
+static unsigned long estimate_pageable_memory(void)
+{
+	static unsigned long next_check;
+	static unsigned long total;
+	unsigned long active, inactive, free;
+
+	if (time_after(jiffies, next_check)) {
+		get_zone_counts(&active, &inactive, &free);
+		total = active + inactive + free;
+		next_check = jiffies + HZ/10;
+	}
+
+	return total;
+}
+
+static void decay_clockpro_variables(void)
+{
+	struct zone * zone;
+	int cpu;
+
+	for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+		per_cpu(evicted_pages, cpu) /= 2;
+
+	for_each_zone(zone)
+		zone->active_scanned /= 2;
+}
+
+int page_is_hot(struct page * page, struct address_space * mapping,
+		unsigned long index)
+{
+	unsigned long long distance;
+	unsigned long long evicted;
+	int refault_distance;
+	struct zone *zone;
+
+	/* Was the page recently evicted ? */
+	refault_distance = recently_evicted(mapping, index);
+	if (refault_distance < 0)
+		return 0;
+
+	distance = estimate_pageable_memory() + refault_distance;
+	evicted = get_evicted();
+	zone = page_zone(page);
+
+	/* Only consider recent history for the calculation below. */
+	if (unlikely(evicted > distance))
+		decay_clockpro_variables();
+
+	/*
+	 * Estimate whether the inter-reference distance of the tested
+	 * page is smaller than the inter-reference distance of the
+	 * oldest page on the active list.
+	 *
+	 *  distance        zone->nr_active
+	 * ---------- <  ----------------------
+	 *  evicted       zone->active_scanned
+	 */
+	if (distance * zone->active_scanned < evicted * zone->nr_active) {
+		if (zone->active_limit > zone->present_pages / 8)
+			zone->active_limit--;
+		return 1;
+	}
+
+	/* Increase the active limit more slowly. */
+	if ((evicted & 1) && zone->active_limit < zone->present_pages * 7 / 8)
+		zone->active_limit++;
+	return 0;
+}
+
+void remember_page(struct page * page, struct address_space * mapping,
+		unsigned long index)
+{
+	struct zone * zone = page_zone(page);
+	if (do_remember_page(mapping, index) && (index & 1) &&
+			zone->active_limit < zone->present_pages * 7 / 8)
+		zone->active_limit++;
+}
Index: linux-2.6.12-vm/mm/filemap.c
===================================================================
--- linux-2.6.12-vm.orig/mm/filemap.c
+++ linux-2.6.12-vm/mm/filemap.c
@@ -401,9 +401,12 @@ int add_to_page_cache_lru(struct page *p
 				pgoff_t offset, int gfp_mask)
 {
 	int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	recently_evicted(mapping, offset);
-	if (ret == 0)
-		lru_cache_add(page);
+	if (ret == 0) {
+		if (page_is_hot(page, mapping, offset))
+			lru_cache_add_active(page);
+		else
+			lru_cache_add(page);
+	}
 	return ret;
 }
 
Index: linux-2.6.12-vm/mm/nonresident.c
===================================================================
--- linux-2.6.12-vm.orig/mm/nonresident.c
+++ linux-2.6.12-vm/mm/nonresident.c
@@ -25,6 +25,7 @@
 #include <linux/prefetch.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/swap.h>
 
 /* Number of non-resident pages per hash bucket. Never smaller than 15. */
 #if (L1_CACHE_BYTES < 64)
@@ -101,7 +102,7 @@ int recently_evicted(struct address_spac
 	return -1;
 }
 
-int remember_page(struct address_space * mapping, unsigned long index)
+int do_remember_page(struct address_space * mapping, unsigned long index)
 {
 	struct nr_bucket * nr_bucket;
 	u32 nrpage;
@@ -125,6 +126,7 @@ int remember_page(struct address_space *
 	preempt_enable();
 
 	/* Statistics may want to know whether the entry was in use. */
+	__get_cpu_var(evicted_pages)++;
 	return xchg(&nr_bucket->page[i], nrpage);
 }
 
Index: linux-2.6.12-vm/mm/page_alloc.c
===================================================================
--- linux-2.6.12-vm.orig/mm/page_alloc.c
+++ linux-2.6.12-vm/mm/page_alloc.c
@@ -1715,6 +1715,7 @@ static void __init free_area_init_core(s
 		zone->nr_scan_inactive = 0;
 		zone->nr_active = 0;
 		zone->nr_inactive = 0;
+		zone->active_limit = zone->present_pages * 2 / 3;
 		if (!size)
 			continue;
 
Index: linux-2.6.12-vm/mm/swap_state.c
===================================================================
--- linux-2.6.12-vm.orig/mm/swap_state.c
+++ linux-2.6.12-vm/mm/swap_state.c
@@ -323,6 +323,7 @@ struct page *read_swap_cache_async(swp_e
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *found_page, *new_page = NULL;
+	int active;
 	int err;
 
 	do {
@@ -344,7 +345,7 @@ struct page *read_swap_cache_async(swp_e
 				break;		/* Out of memory */
 		}
 
-		recently_evicted(&swapper_space, entry.val);
+		active = page_is_hot(new_page, &swapper_space, entry.val);
 
 		/*
 		 * Associate the page with swap entry in the swap cache.
@@ -361,7 +362,10 @@ struct page *read_swap_cache_async(swp_e
 			/*
 			 * Initiate read into locked page and return.
 			 */
-			lru_cache_add_active(new_page);
+			if (active) {
+				lru_cache_add_active(new_page);
+			} else
+				lru_cache_add(new_page);
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
Index: linux-2.6.12-vm/mm/vmscan.c
===================================================================
--- linux-2.6.12-vm.orig/mm/vmscan.c
+++ linux-2.6.12-vm/mm/vmscan.c
@@ -355,12 +355,14 @@ static int shrink_list(struct list_head 
 	while (!list_empty(page_list)) {
 		struct address_space *mapping;
 		struct page *page;
+		struct zone *zone;
 		int may_enter_fs;
 		int referenced;
 
 		cond_resched();
 
 		page = lru_to_page(page_list);
+		zone = page_zone(page);
 		list_del(&page->lru);
 
 		if (TestSetPageLocked(page))
@@ -492,7 +494,7 @@ static int shrink_list(struct list_head 
 #ifdef CONFIG_SWAP
 		if (PageSwapCache(page)) {
 			swp_entry_t swap = { .val = page->private };
-			remember_page(&swapper_space, page->private);
+			remember_page(page, &swapper_space, page->private);
 			__delete_from_swap_cache(page);
 			write_unlock_irq(&mapping->tree_lock);
 			swap_free(swap);
@@ -501,7 +503,7 @@ static int shrink_list(struct list_head 
 		}
 #endif /* CONFIG_SWAP */
 
-		remember_page(page->mapping, page->index);
+		remember_page(page, page->mapping, page->index);
 		__remove_from_page_cache(page);
 		write_unlock_irq(&mapping->tree_lock);
 		__put_page(page);
@@ -684,6 +686,7 @@ refill_inactive_zone(struct zone *zone, 
 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
 				    &l_hold, &pgscanned);
 	zone->pages_scanned += pgscanned;
+	zone->active_scanned += pgscanned;
 	zone->nr_active -= pgmoved;
 	spin_unlock_irq(&zone->lru_lock);
 
@@ -799,10 +802,15 @@ shrink_zone(struct zone *zone, struct sc
 	unsigned long nr_inactive;
 
 	/*
-	 * Add one to `nr_to_scan' just to make sure that the kernel will
-	 * slowly sift through the active list.
+	 * Scan the active list if we have too many active pages.
+	 * The limit is automatically adjusted through refaults
+	 * measuring how well the VM did in the past.
 	 */
-	zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+	if (zone->nr_active > zone->active_limit)
+		zone->nr_scan_active += zone->nr_active - zone->active_limit;
+	else if (sc->priority < DEF_PRIORITY - 2)
+		zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+
 	nr_active = zone->nr_scan_active;
 	if (nr_active >= sc->swap_cluster_max)
 		zone->nr_scan_active = 0;

--
-- 
All Rights Reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2005-08-10 20:02 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-08-10 20:02 [PATCH/RFT 0/5] " Rik van Riel
2005-08-10 20:02 ` [PATCH/RFT 1/5] " Rik van Riel
2005-08-10 20:02 ` [PATCH/RFT 2/5] " Rik van Riel
2005-08-10 20:27   ` David S. Miller, Rik van Riel
2005-08-10 20:38     ` Rik van Riel
2005-08-10 20:02 ` [PATCH/RFT 3/5] " Rik van Riel
2005-08-10 20:02 ` Rik van Riel [this message]
2005-08-10 20:31   ` [PATCH/RFT 4/5] " David S. Miller, Rik van Riel
2005-08-18  0:38     ` Andrew Morton
2005-08-18  2:48       ` David S. Miller, Andrew Morton
2005-08-18  4:05         ` Andrew Morton
2005-08-18  4:48           ` David S. Miller, Andrew Morton
2005-08-19  7:03             ` Rusty Russell
2005-08-19  7:10               ` Andrew Morton
2005-08-19  7:27                 ` Rusty Russell
2005-08-19 13:04                   ` Horst von Brand
2005-08-10 23:22   ` Marcelo Tosatti
2005-08-11  0:06     ` Rik van Riel
2005-08-10 20:02 ` [PATCH/RFT 5/5] " Rik van Riel
2005-08-11 22:08   ` Song Jiang
2005-08-12  1:22     ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050810200943.809832000@jumble.boston.redhat.com \
    --to=riel@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox