* [RFC][PATCH 1/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
@ 2005-08-27 21:57 ` a.p.zijlstra
2005-08-27 21:57 ` [RFC][PATCH 2/6] " a.p.zijlstra
` (5 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:57 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-nonresident.patch --]
[-- Type: text/plain, Size: 10989 bytes --]
Index: linux-2.6-cart/include/linux/swap.h
===================================================================
--- linux-2.6-cart.orig/include/linux/swap.h
+++ linux-2.6-cart/include/linux/swap.h
@@ -154,6 +154,15 @@ extern void out_of_memory(unsigned int _
/* linux/mm/memory.c */
extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
+/* linux/mm/nonresident.c */
+#define NR_filter 0x01 /* short/long */
+#define NR_list 0x02 /* b1/b2; correlates to PG_active */
+#define NR_evict 0x80000000
+
+extern unsigned int remember_page(struct address_space *, unsigned long, unsigned int);
+extern unsigned int recently_evicted(struct address_space *, unsigned long);
+extern void init_nonresident(void);
+
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
extern unsigned long totalhigh_pages;
@@ -292,6 +301,11 @@ static inline swp_entry_t get_swap_page(
#define grab_swap_token() do { } while(0)
#define has_swap_token(x) 0
+/* linux/mm/nonresident.c */
+#define init_nonresident() do { } while (0)
+#define remember_page(x,y,z) 0
+#define recently_evicted(x,y) 0
+
#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
Index: linux-2.6-cart/init/main.c
===================================================================
--- linux-2.6-cart.orig/init/main.c
+++ linux-2.6-cart/init/main.c
@@ -47,6 +47,7 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
+#include <linux/swap.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -494,6 +495,7 @@ asmlinkage void __init start_kernel(void
}
#endif
vfs_caches_init_early();
+ init_nonresident();
mem_init();
kmem_cache_init();
setup_per_cpu_pageset();
Index: linux-2.6-cart/mm/Makefile
===================================================================
--- linux-2.6-cart.orig/mm/Makefile
+++ linux-2.6-cart/mm/Makefile
@@ -12,7 +12,8 @@ obj-y := bootmem.o filemap.o mempool.o
readahead.o slab.o swap.o truncate.o vmscan.o \
prio_tree.o $(mmu-y)
-obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
+obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o \
+ nonresident.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
Index: linux-2.6-cart/mm/nonresident.c
===================================================================
--- /dev/null
+++ linux-2.6-cart/mm/nonresident.c
@@ -0,0 +1,277 @@
+/*
+ * mm/nonresident.c
+ * (C) 2004,2005 Red Hat, Inc
+ * Written by Rik van Riel <riel@redhat.com>
+ * Released under the GPL, see the file COPYING for details.
+ * Adapted by Peter Zijlstra <a.p.zijlstra@chello.nl> for use by ARC
+ * like algorithms.
+ *
+ * Keeps track of whether a non-resident page was recently evicted
+ * and should be immediately promoted to the active list. This also
+ * helps automatically tune the inactive target.
+ *
+ * The pageout code stores a recently evicted page in this cache
+ * by calling remember_page(mapping/mm, index/vaddr)
+ * and can look it up in the cache by calling recently_evicted()
+ * with the same arguments.
+ *
+ * Note that there is no way to invalidate pages after eg. truncate
+ * or exit, we let the pages fall out of the non-resident set through
+ * normal replacement.
+ *
+ *
+ * Modified to work with ARC like algorithms who:
+ * - need to balance two FIFOs; |b1| + |b2| = c,
+ * - keep a flag per non-resident page.
+ *
+ * The bucket contains two single linked cyclic lists (CLOCKS) and each
+ * clock has a tail hand. By selecting a victim clock upon insertion it
+ * is possible to balance them.
+ *
+ * The slot looks like this:
+ * struct slot_t {
+ * u32 cookie : 24; // LSB
+ * u32 index : 6;
+ * u32 filter : 1;
+ * u32 clock : 1; // MSB
+ * };
+ *
+ * The bucket is guarded by a spinlock.
+ */
+#include <linux/swap.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/hash.h>
+#include <linux/prefetch.h>
+#include <linux/kernel.h>
+
+#define TARGET_SLOTS 64
+#define NR_CACHELINES (TARGET_SLOTS*sizeof(u32) / L1_CACHE_BYTES)
+#define NR_SLOTS (((NR_CACHELINES * L1_CACHE_BYTES) - sizeof(spinlock_t) - 2*sizeof(u16)) / sizeof(u32))
+#if 0
+#if NR_SLOTS < (TARGET_SLOTS / 2)
+#warning very small slot size
+#if NR_SLOTS <= 0
+#error no room for slots left
+#endif
+#endif
+#endif
+
+#define BUILD_MASK(bits, shift) (((1 << (bits)) - 1) << (shift))
+
+#define FLAGS_BITS 2
+#define FLAGS_SHIFT (sizeof(u32)*8 - FLAGS_BITS)
+#define FLAGS_MASK BUILD_MASK(FLAGS_BITS, FLAGS_SHIFT)
+
+#define SET_FLAGS(x, flg) ((x) = ((x) & ~FLAGS_MASK) | ((flg) << FLAGS_SHIFT))
+#define GET_FLAGS(x) (((x) & FLAGS_MASK) >> FLAGS_SHIFT)
+
+#define INDEX_BITS 6 /* ceil(log2(NR_SLOTS)) */
+#define INDEX_SHIFT (FLAGS_SHIFT - INDEX_BITS)
+#define INDEX_MASK BUILD_MASK(INDEX_BITS, INDEX_SHIFT)
+
+#define SET_INDEX(x, idx) ((x) = ((x) & ~INDEX_MASK) | ((idx) << INDEX_SHIFT))
+#define GET_INDEX(x) (((x) & INDEX_MASK) >> INDEX_SHIFT)
+
+struct nr_bucket
+{
+ spinlock_t lock;
+ u16 hand[2];
+ u32 slot[NR_SLOTS];
+} ____cacheline_aligned;
+
+/* The non-resident page hash table. */
+static struct nr_bucket * nonres_table;
+static unsigned int nonres_shift;
+static unsigned int nonres_mask;
+
+/* hash the address into a bucket */
+static struct nr_bucket * nr_hash(void * mapping, unsigned long index)
+{
+ unsigned long bucket;
+ unsigned long hash;
+
+ hash = hash_ptr(mapping, BITS_PER_LONG);
+ hash = 37 * hash + hash_long(index, BITS_PER_LONG);
+ bucket = hash & nonres_mask;
+
+ return nonres_table + bucket;
+}
+
+/* hash the address and inode into a cookie */
+static u32 nr_cookie(struct address_space * mapping, unsigned long index)
+{
+ unsigned long cookie;
+
+ cookie = hash_ptr(mapping, BITS_PER_LONG);
+ cookie = 37 * cookie + hash_long(index, BITS_PER_LONG);
+
+ if (mapping && mapping->host) {
+ cookie = 37 * cookie + hash_long(mapping->host->i_ino, BITS_PER_LONG);
+ }
+
+ return (u32)(cookie >> (BITS_PER_LONG - 32));
+}
+
+unsigned int recently_evicted(struct address_space * mapping, unsigned long index)
+{
+ struct nr_bucket * nr_bucket;
+ u32 wanted, mask;
+ unsigned int r_flags = 0;
+ int i;
+ unsigned long iflags;
+
+ prefetch(mapping->host);
+ nr_bucket = nr_hash(mapping, index);
+
+ spin_lock_prefetch(nr_bucket); // prefetch_range(nr_bucket, NR_CACHELINES);
+ mask = ~(FLAGS_MASK | INDEX_MASK);
+ wanted = nr_cookie(mapping, index) & mask;
+
+ spin_lock_irqsave(&nr_bucket->lock, iflags);
+ for (i = 0; i < NR_SLOTS; ++i) {
+ if ((nr_bucket->slot[i] & mask) == wanted) {
+ r_flags = GET_FLAGS(nr_bucket->slot[i]);
+ r_flags |= NR_evict; /* set the MSB to mark presence */
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&nr_bucket->lock, iflags);
+
+ return r_flags;
+}
+
+/* flags:
+ * logical and of the page flags (NR_filter, NR_list) and
+ * an NR_evict target
+ *
+ * remove current (b from 'abc'):
+ *
+ * initial swap(2,3)
+ *
+ * 1: -> [2],a 1: -> [2],a
+ * * 2: -> [3],b 2: -> [1],c
+ * 3: -> [1],c * 3: -> [3],b
+ *
+ * 3 is now free for use.
+ *
+ *
+ * insert before (d before b in 'abc')
+ *
+ * initial set 4 swap(2,4)
+ *
+ * 1: -> [2],a 1: -> [2],a 1: -> [2],a
+ * * 2: -> [3],b 2: -> [3],b 2: -> [4],d
+ * 3: -> [1],c 3: -> [1],c 3: -> [1],c
+ * 4: nil 4: -> [4],d * 4: -> [3],b
+ *
+ * leaving us with 'adbc'.
+ */
+unsigned int remember_page(struct address_space * mapping, unsigned long index, unsigned int flags)
+{
+ struct nr_bucket *nr_bucket;
+ u32 cookie;
+ u32 *slot, *tail;
+ unsigned int slot_pos, tail_pos;
+ unsigned long iflags;
+
+ prefetch(mapping->host);
+ nr_bucket = nr_hash(mapping, index);
+
+ spin_lock_prefetch(nr_bucket); // prefetchw_range(nr_bucket, NR_CACHELINES);
+ cookie = nr_cookie(mapping, index);
+ SET_FLAGS(cookie, flags);
+
+ flags &= NR_evict; /* removal chain */
+ spin_lock_irqsave(&nr_bucket->lock, iflags);
+
+ /* free a slot */
+again:
+ tail_pos = nr_bucket->hand[!!flags];
+ BUG_ON(tail_pos >= NR_SLOTS);
+ tail = &nr_bucket->slot[tail_pos];
+ if (unlikely((*tail & NR_evict) != flags)) {
+ flags ^= NR_evict; /* empty chain; take other one */
+ goto again;
+ }
+ BUG_ON((*tail & NR_evict) != flags);
+ /* free slot by swapping tail,tail+1, so that we skip over tail */
+ slot_pos = GET_INDEX(*tail);
+ BUG_ON(slot_pos >= NR_SLOTS);
+ slot = &nr_bucket->slot[slot_pos];
+ BUG_ON((*slot & NR_evict) != flags);
+ if (likely(tail != slot)) *slot = xchg(tail, *slot);
+ /* slot: -> [slot], old cookie */
+ BUG_ON(GET_INDEX(*slot) != slot_pos);
+
+ flags = (cookie & NR_evict); /* insertion chain */
+
+ /* place cookie in empty slot */
+ SET_INDEX(cookie, slot_pos); /* -> [slot], cookie */
+ cookie = xchg(slot, cookie); /* slot: -> [slot], cookie */
+
+ /* insert slot before tail; ie. MRU pos */
+ tail_pos = nr_bucket->hand[!!flags];
+ BUG_ON(tail_pos >= NR_SLOTS);
+ tail = &nr_bucket->slot[tail_pos];
+ if (likely((*tail & NR_evict) == flags && tail != slot))
+ *slot = xchg(tail, *slot); /* swap if not empty and not same */
+ nr_bucket->hand[!!flags] = slot_pos;
+
+ spin_unlock_irqrestore(&nr_bucket->lock, iflags);
+
+ return GET_FLAGS(cookie);
+}
+
+/*
+ * For interactive workloads, we remember about as many non-resident pages
+ * as we have actual memory pages. For server workloads with large inter-
+ * reference distances we could benefit from remembering more.
+ */
+static __initdata unsigned long nonresident_factor = 1;
+void __init init_nonresident(void)
+{
+ int target;
+ int i, j;
+
+ /*
+ * Calculate the non-resident hash bucket target. Use a power of
+ * two for the division because alloc_large_system_hash rounds up.
+ */
+ target = nr_all_pages * nonresident_factor;
+ target /= (sizeof(struct nr_bucket) / sizeof(u32));
+
+ nonres_table = alloc_large_system_hash("Non-resident page tracking",
+ sizeof(struct nr_bucket),
+ target,
+ 0,
+ HASH_EARLY | HASH_HIGHMEM,
+ &nonres_shift,
+ &nonres_mask,
+ 0);
+
+ for (i = 0; i < (1 << nonres_shift); i++) {
+ spin_lock_init(&nonres_table[i].lock);
+ nonres_table[i].hand[0] = nonres_table[i].hand[1] = 0;
+ for (j = 0; j < NR_SLOTS; ++j) {
+ nonres_table[i].slot[j] = 0;
+ SET_FLAGS(nonres_table[i].slot[j], (NR_list | NR_filter));
+ if (j < NR_SLOTS - 1)
+ SET_INDEX(nonres_table[i].slot[j], j+1);
+ else /* j == NR_SLOTS - 1 */
+ SET_INDEX(nonres_table[i].slot[j], 0);
+ }
+ }
+}
+
+static int __init set_nonresident_factor(char * str)
+{
+ if (!str)
+ return 0;
+ nonresident_factor = simple_strtoul(str, &str, 0);
+ return 1;
+}
+
+__setup("nonresident_factor=", set_nonresident_factor);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [RFC][PATCH 2/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
2005-08-27 21:57 ` [RFC][PATCH 1/6] " a.p.zijlstra
@ 2005-08-27 21:57 ` a.p.zijlstra
2005-08-29 3:02 ` Rik van Riel
2005-08-27 21:57 ` [RFC][PATCH 3/6] " a.p.zijlstra
` (4 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:57 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-nonresident-stats.patch --]
[-- Type: text/plain, Size: 3367 bytes --]
Index: linux-2.6-cart/fs/proc/proc_misc.c
===================================================================
--- linux-2.6-cart.orig/fs/proc/proc_misc.c
+++ linux-2.6-cart/fs/proc/proc_misc.c
@@ -233,6 +233,20 @@ static struct file_operations proc_zonei
.release = seq_release,
};
+extern struct seq_operations nonresident_op;
+static int nonresident_open(struct inode *inode, struct file *file)
+{
+ (void)inode;
+ return seq_open(file, &nonresident_op);
+}
+
+static struct file_operations nonresident_file_operations = {
+ .open = nonresident_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static int version_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
@@ -602,6 +616,7 @@ void __init proc_misc_init(void)
create_seq_entry("interrupts", 0, &proc_interrupts_operations);
create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+ create_seq_entry("nonresident",S_IRUGO, &nonresident_file_operations);
create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
create_seq_entry("diskstats", 0, &proc_diskstats_operations);
Index: linux-2.6-cart/mm/nonresident.c
===================================================================
--- linux-2.6-cart.orig/mm/nonresident.c
+++ linux-2.6-cart/mm/nonresident.c
@@ -275,3 +275,74 @@ static int __init set_nonresident_factor
}
__setup("nonresident_factor=", set_nonresident_factor);
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static void *stats_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos < 0 || *pos >= (1 << nonres_shift))
+ return NULL;
+
+ m->private = (unsigned long)*pos;
+
+ return pos;
+}
+
+static void *stats_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ if (*pos < (1 << nonres_shift)-1) {
+ (*pos)++;
+ (unsigned long)m->private++;
+ return pos;
+ }
+ return NULL;
+}
+
+static void stats_stop(struct seq_file *m, void *arg)
+{
+}
+
+static void bucket_stats(struct nr_bucket * nr_bucket, int * b1, int * b2)
+{
+ unsigned int i, b[2] = {0, 0};
+ for (i = 0; i < 2; ++i) {
+ unsigned int j = nr_bucket->hand[i];
+ do
+ {
+ u32 *slot = &nr_bucket->slot[j];
+ if (!!(GET_FLAGS(*slot) & NR_list) != !!i)
+ break;
+
+ j = GET_INDEX(*slot);
+ ++b[i];
+ } while (j != nr_bucket->hand[i]);
+ }
+ *b1=b[0];
+ *b2=b[1];
+}
+
+static int stats_show(struct seq_file *m, void *arg)
+{
+ unsigned int index = (unsigned long)m->private;
+ struct nr_bucket *nr_bucket = &nonres_table[index];
+ unsigned long flags;
+ unsigned int b1, b2;
+
+ spin_lock_irqsave(&nr_bucket->lock, flags);
+ bucket_stats(nr_bucket, &b1, &b2);
+ spin_unlock_irqrestore(&nr_bucket->lock, flags);
+ seq_printf(m, "%d\t%d\t%d\n", b1, b2, b1+b2);
+
+ return 0;
+}
+
+struct seq_operations nonresident_op = {
+ .start = stats_start,
+ .next = stats_next,
+ .stop = stats_stop,
+ .show = stats_show,
+};
+
+#endif /* CONFIG_PROC_FS */
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC][PATCH 2/6] CART Implementation
2005-08-27 21:57 ` [RFC][PATCH 2/6] " a.p.zijlstra
@ 2005-08-29 3:02 ` Rik van Riel
2005-08-29 4:15 ` Peter Zijlstra
0 siblings, 1 reply; 12+ messages in thread
From: Rik van Riel @ 2005-08-29 3:02 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: linux-mm
On Sat, 27 Aug 2005, a.p.zijlstra@chello.nl wrote:
> +static void bucket_stats(struct nr_bucket * nr_bucket, int * b1, int * b2)
> +{
> + unsigned int i, b[2] = {0, 0};
> + for (i = 0; i < 2; ++i) {
> + unsigned int j = nr_bucket->hand[i];
> + do
> + {
> + u32 *slot = &nr_bucket->slot[j];
> + if (!!(GET_FLAGS(*slot) & NR_list) != !!i)
> + break;
> +
> + j = GET_INDEX(*slot);
> + ++b[i];
> + } while (j != nr_bucket->hand[i]);
Does this properly skip empty slots ?
Remember that a page that got paged in leaves a zeroed
out slot in the bucket...
--
All Rights Reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC][PATCH 2/6] CART Implementation
2005-08-29 3:02 ` Rik van Riel
@ 2005-08-29 4:15 ` Peter Zijlstra
2005-08-29 6:20 ` Peter Zijlstra
0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2005-08-29 4:15 UTC (permalink / raw)
To: Rik van Riel; +Cc: linux-mm
On Sun, 2005-08-28 at 23:02 -0400, Rik van Riel wrote:
> On Sat, 27 Aug 2005, a.p.zijlstra@chello.nl wrote:
>
> > +static void bucket_stats(struct nr_bucket * nr_bucket, int * b1, int * b2)
> > +{
> > + unsigned int i, b[2] = {0, 0};
> > + for (i = 0; i < 2; ++i) {
> > + unsigned int j = nr_bucket->hand[i];
> > + do
> > + {
> > + u32 *slot = &nr_bucket->slot[j];
> > + if (!!(GET_FLAGS(*slot) & NR_list) != !!i)
> > + break;
> > +
> > + j = GET_INDEX(*slot);
> > + ++b[i];
> > + } while (j != nr_bucket->hand[i]);
>
> Does this properly skip empty slots ?
There are no empty slots. This thing always has B1_j + B2_j = NR_SLOTS.
I couldn't manage keeping track of two lists and empty slots. It doesn't
really matter though. I just have to start out with |B1| = 0 and |B2| =
c. I fill B2_j with zero cookies, so getting a hit there is very
unlikely, that way they just get overwritten due to old age and all is
well.
>
> Remember that a page that got paged in leaves a zeroed
> out slot in the bucket...
>
Yeah, I was playing aroung with that. I'll change that back because it
does indeed generate a problem elsewhere.
--
Peter Zijlstra <a.p.zijlstra@chello.nl>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC][PATCH 2/6] CART Implementation
2005-08-29 4:15 ` Peter Zijlstra
@ 2005-08-29 6:20 ` Peter Zijlstra
0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2005-08-29 6:20 UTC (permalink / raw)
To: Rik van Riel; +Cc: linux-mm
I'm being dense again. I really should not write these mails at 6am :-{
On Mon, 2005-08-29 at 06:15 +0200, Peter Zijlstra wrote:
> On Sun, 2005-08-28 at 23:02 -0400, Rik van Riel wrote:
> > On Sat, 27 Aug 2005, a.p.zijlstra@chello.nl wrote:
> >
> > > +static void bucket_stats(struct nr_bucket * nr_bucket, int * b1, int * b2)
> > > +{
> > > + unsigned int i, b[2] = {0, 0};
> > > + for (i = 0; i < 2; ++i) {
> > > + unsigned int j = nr_bucket->hand[i];
> > > + do
> > > + {
> > > + u32 *slot = &nr_bucket->slot[j];
> > > + if (!!(GET_FLAGS(*slot) & NR_list) != !!i)
> > > + break;
> > > +
> > > + j = GET_INDEX(*slot);
> > > + ++b[i];
> > > + } while (j != nr_bucket->hand[i]);
> >
> > Does this properly skip empty slots ?
>
I should idd skip 0 cookie slots for the stats. The hidden assumption
was that the balance would not be disturbed by these null cookies; which
is not obvious true. Thanks for the hint.
> There are no empty slots. This thing always has B1_j + B2_j = NR_SLOTS.
> I couldn't manage keeping track of two lists and empty slots. It doesn't
> really matter though. I just have to start out with |B1| = 0 and |B2| =
> c. I fill B2_j with zero cookies, so getting a hit there is very
> unlikely, that way they just get overwritten due to old age and all is
> well.
>
I could ofcourse make the head 1 byte and have 4 list heads in there,
that way I even have 1 spare. I'll see what kind of mess that would
give ;-).
> >
> > Remember that a page that got paged in leaves a zeroed
> > out slot in the bucket...
> >
> Yeah, I was playing aroung with that. I'll change that back because it
> does indeed generate a problem elsewhere.
should be there again in the second series I send out earlier.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC][PATCH 3/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
2005-08-27 21:57 ` [RFC][PATCH 1/6] " a.p.zijlstra
2005-08-27 21:57 ` [RFC][PATCH 2/6] " a.p.zijlstra
@ 2005-08-27 21:57 ` a.p.zijlstra
2005-08-27 21:58 ` [RFC][PATCH 4/6] " a.p.zijlstra
` (3 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:57 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-cart.patch --]
[-- Type: text/plain, Size: 11783 bytes --]
Index: linux-2.6-cart/include/linux/mm_inline.h
===================================================================
--- linux-2.6-cart.orig/include/linux/mm_inline.h
+++ linux-2.6-cart/include/linux/mm_inline.h
@@ -31,10 +31,28 @@ static inline void
del_page_from_lru(struct zone *zone, struct page *page)
{
list_del(&page->lru);
- if (PageActive(page)) {
- ClearPageActive(page);
+ if (TestClearPageActive(page)) {
zone->nr_active--;
} else {
zone->nr_inactive--;
}
+ if (TestClearPageLongTerm(page)) {
+ /* zone->nr_longterm--; */
+ } else {
+ zone->nr_shortterm--;
+ }
+}
+
+static inline void
+add_page_to_active_tail(struct zone *zone, struct page *page)
+{
+ list_add_tail(&page->lru, &zone->active_list);
+ zone->nr_active++;
+}
+
+static inline void
+add_page_to_inactive_tail(struct zone *zone, struct page *page)
+{
+ list_add_tail(&page->lru, &zone->inactive_list);
+ zone->nr_inactive++;
}
Index: linux-2.6-cart/include/linux/mmzone.h
===================================================================
--- linux-2.6-cart.orig/include/linux/mmzone.h
+++ linux-2.6-cart/include/linux/mmzone.h
@@ -143,13 +143,17 @@ struct zone {
ZONE_PADDING(_pad1_)
/* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct list_head active_list;
- struct list_head inactive_list;
+ spinlock_t lru_lock;
+ struct list_head active_list; /* The T1 list of CART */
+ struct list_head inactive_list; /* The T2 list of CART */
unsigned long nr_scan_active;
unsigned long nr_scan_inactive;
unsigned long nr_active;
unsigned long nr_inactive;
+ unsigned long nr_evicted_active;
+ unsigned long nr_shortterm; /* number of short term pages */
+ unsigned long nr_p; /* p from the CART paper */
+ unsigned long nr_q; /* q from the cart paper */
unsigned long pages_scanned; /* since last reclaim */
int all_unreclaimable; /* All pages pinned */
Index: linux-2.6-cart/include/linux/page-flags.h
===================================================================
--- linux-2.6-cart.orig/include/linux/page-flags.h
+++ linux-2.6-cart/include/linux/page-flags.h
@@ -76,6 +76,8 @@
#define PG_nosave_free 18 /* Free, should not be written */
#define PG_uncached 19 /* Page has been mapped as uncached */
+#define PG_longterm 20 /* Filter bit for CART see mm/cart.c */
+
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
* allowed.
@@ -305,6 +307,12 @@ extern void __mod_page_state(unsigned lo
#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)
+#define PageLongTerm(page) test_bit(PG_longterm, &(page)->flags)
+#define SetPageLongTerm(page) set_bit(PG_longterm, &(page)->flags)
+#define TestSetPageLongTerm(page) test_and_set_bit(PG_longterm, &(page)->flags)
+#define ClearPageLongTerm(page) clear_bit(PG_longterm, &(page)->flags)
+#define TestClearPageLongTerm(page) test_and_clear_bit(PG_longterm, &(page)->flags)
+
struct page; /* forward declaration */
int test_clear_page_dirty(struct page *page);
Index: linux-2.6-cart/include/linux/swap.h
===================================================================
--- linux-2.6-cart.orig/include/linux/swap.h
+++ linux-2.6-cart/include/linux/swap.h
@@ -7,6 +7,7 @@
#include <linux/mmzone.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/mm.h>
#include <asm/atomic.h>
#include <asm/page.h>
@@ -163,6 +164,22 @@ extern unsigned int remember_page(struct
extern unsigned int recently_evicted(struct address_space *, unsigned long);
extern void init_nonresident(void);
+/* linux/mm/cart.c */
+extern void cart_init(void);
+extern void __cart_insert(struct zone *, struct page *);
+extern struct page *__cart_replace(struct zone *);
+extern void __cart_reinsert(struct zone *, struct page*);
+extern void __cart_remember(struct zone *, struct page*);
+
+static inline void cart_remember(struct page *page)
+{
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ __cart_remember(zone, page);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
extern unsigned long totalhigh_pages;
Index: linux-2.6-cart/init/main.c
===================================================================
--- linux-2.6-cart.orig/init/main.c
+++ linux-2.6-cart/init/main.c
@@ -497,6 +497,7 @@ asmlinkage void __init start_kernel(void
vfs_caches_init_early();
init_nonresident();
mem_init();
+ cart_init();
kmem_cache_init();
setup_per_cpu_pageset();
numa_policy_init();
Index: linux-2.6-cart/mm/Makefile
===================================================================
--- linux-2.6-cart.orig/mm/Makefile
+++ linux-2.6-cart/mm/Makefile
@@ -13,7 +13,7 @@ obj-y := bootmem.o filemap.o mempool.o
prio_tree.o $(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o \
- nonresident.o
+ nonresident.o cart.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
Index: linux-2.6-cart/mm/cart.c
===================================================================
--- /dev/null
+++ linux-2.6-cart/mm/cart.c
@@ -0,0 +1,243 @@
+/* For further details, please refer to the CART paper here -
+ * http://www.almaden.ibm.com/cs/people/dmodha/clockfast.pdf
+ *
+ * Modified by Peter Zijlstra to work with the nonresident code I adapted
+ * from Rik van Riel.
+ *
+ * XXX: add page accounting
+ */
+
+#include <linux/swap.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/mm_inline.h>
+#include <linux/rmap.h>
+
+#define cart_cT ((zone)->nr_active + (zone)->nr_inactive)
+#define cart_cB ((zone)->present_pages)
+
+#define size_T1 ((zone)->nr_active)
+#define size_T2 ((zone)->nr_inactive)
+
+#define list_T1 (&(zone)->active_list)
+#define list_T2 (&(zone)->inactive_list)
+
+#define cart_p ((zone)->nr_p)
+#define cart_q ((zone)->nr_q)
+
+#define size_B1 ((zone)->nr_evicted_active)
+#define size_B2 (cart_cB - size_B1)
+
+#define nr_Ns ((zone)->nr_shortterm)
+#define nr_Nl (cart_cT - nr_Ns)
+
+#define T2B(x) (((x) * cart_cB) / (cart_cT + 1))
+#define B2T(x) (((x) * cart_cT) / cart_cB)
+
+/* Called from init/main.c to initialize the cart parameters */
+void cart_init()
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ zone->nr_evicted_active = 0;
+ /* zone->nr_evicted_inactive = cart_cB; */
+ zone->nr_shortterm = 0;
+ /* zone->nr_longterm = 0; */
+ zone->nr_p = 0;
+ zone->nr_q = 0;
+ }
+}
+
+static inline void cart_q_inc(struct zone *zone)
+{
+ /* if (|T2| + |B2| + |T1| - ns >= c) q = min(q + 1, 2c - |T1|) */
+ if (size_T2 + B2T(size_B2) + size_T1 - nr_Ns >= cart_cT)
+ cart_q = min(cart_q + 1, 2*cart_cB - T2B(size_T1));
+}
+
+static inline void cart_q_dec(struct zone *zone)
+{
+ /* q = max(q - 1, c - |T1|) */
+ unsigned long target = cart_cB - T2B(size_T1);
+ if (cart_q <= target)
+ cart_q = target;
+ else
+ --cart_q;
+}
+
+/*
+ * zone->lru_lock taken
+ */
+void __cart_insert(struct zone *zone, struct page *page)
+{
+ unsigned int rflags;
+ unsigned int on_B1, on_B2;
+
+ rflags = recently_evicted(page_mapping(page), page_index(page));
+ on_B1 = (rflags && !(rflags & NR_list));
+ on_B2 = (rflags && (rflags & NR_list));
+
+ if (on_B1) {
+ /* p = min(p + max(1, ns/|B1|), c) */
+ unsigned long ratio = nr_Ns / (B2T(size_B1) + 1);
+ cart_p += ratio ?: 1UL;
+ if (unlikely(cart_p > cart_cT))
+ cart_p = cart_cT;
+
+ SetPageLongTerm(page);
+ /* ++nr_Nl; */
+ } else if (on_B2) {
+ /* p = max(p - max(1, nl/|B2|), 0) */
+ unsigned long ratio = nr_Nl / (B2T(size_B2) + 1);
+ cart_p -= ratio ?: 1UL;
+ if (unlikely(cart_p > cart_cT)) /* unsigned; wrap around */
+ cart_p = 0UL;
+
+ SetPageLongTerm(page);
+ /* NOTE: this function is the only one that uses recently_evicted()
+ * and it does not use the NR_filter flag; we could live without,
+ * for now use as sanity check
+ */
+ BUG_ON(!(rflags & NR_filter)); /* all pages in B2 are longterm */
+
+ /* ++nr_Nl; */
+ cart_q_inc(zone);
+ } else {
+ ClearPageLongTerm(page);
+ ++nr_Ns;
+ }
+
+ ClearPageReferenced(page);
+ SetPageActive(page);
+ add_page_to_active_list(zone, page);
+ BUG_ON(!PageLRU(page));
+}
+
+/* This function selects the candidate and returns the corresponding
+ * struct page * or returns NULL in case no page can be freed.
+ */
+struct page *__cart_replace(struct zone *zone)
+{
+ struct page *page;
+ int referenced;
+
+ while (!list_empty(list_T2)) {
+ page = list_entry(list_T2->next, struct page, lru);
+
+ if (!page_referenced(page, 0, 0))
+ break;
+
+ del_page_from_inactive_list(zone, page);
+ add_page_to_active_tail(zone, page);
+ SetPageActive(page);
+
+ cart_q_inc(zone);
+ }
+
+ while (!list_empty(list_T1)) {
+ page = list_entry(list_T1->next, struct page, lru);
+ referenced = page_referenced(page, 0, 0);
+
+ if (!PageLongTerm(page) && !referenced)
+ break;
+
+ if (referenced) {
+ del_page_from_active_list(zone, page);
+ add_page_to_active_tail(zone, page);
+
+ /* ( |T1| >= min(p + 1, |B1| ) and ( filter = 'S' ) */
+ if (size_T1 >= min(cart_p + 1, B2T(size_B1)) &&
+ !PageLongTerm(page)) {
+ SetPageLongTerm(page);
+ --nr_Ns;
+ /* ++nr_Nl; */
+ }
+ } else {
+ BUG_ON(!PageLongTerm(page));
+
+ del_page_from_active_list(zone, page);
+ add_page_to_inactive_tail(zone, page);
+ ClearPageActive(page);
+
+ cart_q_dec(zone);
+ }
+ }
+
+ page = NULL;
+ if (size_T1 > max(1UL, cart_p) || list_empty(list_T2)) {
+ if (!list_empty(list_T1)) {
+ page = list_entry(list_T1->next, struct page, lru);
+ del_page_from_active_list(zone, page);
+ BUG_ON(PageLongTerm(page));
+ --nr_Ns;
+ }
+ } else {
+ BUG_ON(list_empty(list_T2));
+ page = list_entry(list_T2->next, struct page, lru);
+ del_page_from_inactive_list(zone, page);
+ /* --nr_Nl; */
+ }
+ if (!page) return NULL;
+
+ return page;
+}
+
+/* re-insert pages that were elected for replacement but somehow didn't make it
+ * treat as referenced to let the relaim path make progress.
+ */
+void __cart_reinsert(struct zone *zone, struct page *page )
+{
+ if (!PageLongTerm(page)) ++nr_Ns;
+
+ if (!PageActive(page)) { /* T2 */
+ SetPageActive(page);
+ add_page_to_active_tail(zone, page);
+
+ cart_q_inc(zone);
+ } else { /* T1 */
+ add_page_to_active_tail(zone, page);
+
+ /* ( |T1| >= min(p + 1, |B1| ) and ( filter = 'S' ) */
+ if (size_T1 >= min(cart_p + 1, B2T(size_B1)) &&
+ !PageLongTerm(page)) {
+ SetPageLongTerm(page);
+ --nr_Ns;
+ /* ++nr_Nl; */
+ }
+ }
+}
+
+/* puts pages on the non-resident lists on swap-out
+ * XXX: lose the reliance on zone->lru_lock !!!
+ */
+void __cart_remember(struct zone *zone, struct page *page)
+{
+ unsigned int rflags;
+ unsigned int flags = 0;
+
+ if (!PageActive(page)) {
+ flags |= NR_list;
+ /* ++size_B2; */
+ } else
+ ++size_B1;
+
+ if (PageLongTerm(page))
+ flags |= NR_filter;
+
+ /* history replacement; always remember, if the page was already remembered
+ * this will move it to the head. XXX: not so; fix this !!
+ *
+ * Assume |B1| + |B2| == c + 1, since |B1_j| + |B2_j| := c_j.
+ * The list_empty check is done on the Bn_j side.
+ */
+ /* |B1| <= max(0, q) */
+ if (size_B1 <= cart_q) flags |= NR_evict;
+
+ rflags = remember_page(page_mapping(page), page_index(page), flags);
+
+ if (rflags & NR_list) {
+ /* if (likely(size_B2)) --size_B2; */
+ } else {
+ if (likely(size_B1)) --size_B1;
+ }
+}
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [RFC][PATCH 4/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
` (2 preceding siblings ...)
2005-08-27 21:57 ` [RFC][PATCH 3/6] " a.p.zijlstra
@ 2005-08-27 21:58 ` a.p.zijlstra
2005-08-27 21:58 ` [RFC][PATCH 5/6] " a.p.zijlstra
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:58 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-cart-stats.patch --]
[-- Type: text/plain, Size: 4320 bytes --]
Index: linux-2.6-cart/fs/proc/proc_misc.c
===================================================================
--- linux-2.6-cart.orig/fs/proc/proc_misc.c
+++ linux-2.6-cart/fs/proc/proc_misc.c
@@ -233,6 +233,20 @@ static struct file_operations proc_zonei
.release = seq_release,
};
+extern struct seq_operations cart_op;
+static int cart_open(struct inode *inode, struct file *file)
+{
+ (void)inode;
+ return seq_open(file, &cart_op);
+}
+
+static struct file_operations cart_file_operations = {
+ .open = cart_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
extern struct seq_operations nonresident_op;
static int nonresident_open(struct inode *inode, struct file *file)
{
@@ -616,6 +630,7 @@ void __init proc_misc_init(void)
create_seq_entry("interrupts", 0, &proc_interrupts_operations);
create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+ create_seq_entry("cart",S_IRUGO, &cart_file_operations);
create_seq_entry("nonresident",S_IRUGO, &nonresident_file_operations);
create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
Index: linux-2.6-cart/mm/cart.c
===================================================================
--- linux-2.6-cart.orig/mm/cart.c
+++ linux-2.6-cart/mm/cart.c
@@ -241,3 +241,89 @@ void __cart_remember(struct zone *zone,
if (likely(size_B1)) --size_B1;
}
}
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static void *stats_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos != 0)
+ return NULL;
+
+ lru_add_drain();
+
+ return pos;
+}
+
+static void *stats_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ return NULL;
+}
+
+static void stats_stop(struct seq_file *m, void *arg)
+{
+}
+
+static int stats_show(struct seq_file *m, void *arg)
+{
+ struct zone *zone;
+ for_each_zone(zone) {
+ spin_lock_irq(&zone->lru_lock);
+ seq_printf(m, "\n\n======> zone: %lu <=====\n", (unsigned long)zone);
+ seq_printf(m, "struct zone values:\n");
+ seq_printf(m, " zone->nr_active: %lu\n", zone->nr_active);
+ seq_printf(m, " zone->nr_inactive: %lu\n", zone->nr_inactive);
+ seq_printf(m, " zone->nr_evicted_active: %lu\n", zone->nr_evicted_active);
+ seq_printf(m, " zone->nr_shortterm: %lu\n", zone->nr_shortterm);
+ seq_printf(m, " zone->cart_p: %lu\n", zone->nr_p);
+ seq_printf(m, " zone->cart_q: %lu\n", zone->nr_q);
+ seq_printf(m, " zone->present_pages: %lu\n", zone->present_pages);
+ seq_printf(m, " zone->free_pages: %lu\n", zone->free_pages);
+ seq_printf(m, " zone->pages_min: %lu\n", zone->pages_min);
+ seq_printf(m, " zone->pages_low: %lu\n", zone->pages_low);
+ seq_printf(m, " zone->pages_high: %lu\n", zone->pages_high);
+
+ seq_printf(m, "\n");
+ seq_printf(m, "implicit values:\n");
+ seq_printf(m, " zone->nr_evicted_longterm: %lu\n", size_B2);
+ seq_printf(m, " zone->nr_longterm: %lu\n", nr_Nl);
+ seq_printf(m, " zone->cart_c: %lu\n", cart_cT);
+
+ seq_printf(m, "\n");
+ seq_printf(m, "counted values:\n");
+
+ {
+ struct page *page;
+ unsigned long active = 0, s1 = 0, l1 = 0;
+ unsigned long inactive = 0, s2 = 0, l2 = 0;
+ list_for_each_entry(page, &zone->active_list, lru) {
+ ++active;
+ if (PageLongTerm(page)) ++l1;
+ else ++s1;
+ }
+ list_for_each_entry(page, &zone->inactive_list, lru) {
+ ++inactive;
+ if (PageLongTerm(page)) ++l2;
+ else ++s2;
+ }
+ seq_printf(m, " zone->nr_active: %lu (%lu, %lu)\n", active, s1, l1);
+ seq_printf(m, " zone->nr_inactive: %lu (%lu, %lu)\n", inactive, s2, l2);
+ seq_printf(m, " zone->nr_shortterm: %lu\n", s1+s2);
+ seq_printf(m, " zone->nr_longterm: %lu\n", l1+l2);
+ }
+
+ spin_unlock_irq(&zone->lru_lock);
+ }
+
+ return 0;
+}
+
+struct seq_operations cart_op = {
+ .start = stats_start,
+ .next = stats_next,
+ .stop = stats_stop,
+ .show = stats_show,
+};
+
+#endif /* CONFIG_PROC_FS */
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [RFC][PATCH 5/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
` (3 preceding siblings ...)
2005-08-27 21:58 ` [RFC][PATCH 4/6] " a.p.zijlstra
@ 2005-08-27 21:58 ` a.p.zijlstra
2005-08-27 21:58 ` [RFC][PATCH 6/6] " a.p.zijlstra
2005-08-28 0:25 ` [RFC][PATCH 0/6] " Marcelo Tosatti
6 siblings, 0 replies; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:58 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-use-once.patch --]
[-- Type: text/plain, Size: 4596 bytes --]
Index: linux-2.6-cart/mm/filemap.c
===================================================================
--- linux-2.6-cart.orig/mm/filemap.c
+++ linux-2.6-cart/mm/filemap.c
@@ -723,7 +723,6 @@ void do_generic_mapping_read(struct addr
unsigned long offset;
unsigned long last_index;
unsigned long next_index;
- unsigned long prev_index;
loff_t isize;
struct page *cached_page;
int error;
@@ -732,7 +731,6 @@ void do_generic_mapping_read(struct addr
cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
- prev_index = ra.prev_page;
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
@@ -779,13 +777,7 @@ page_ok:
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- /*
- * When (part of) the same page is read multiple times
- * in succession, only mark it as accessed the first time.
- */
- if (prev_index != index)
- mark_page_accessed(page);
- prev_index = index;
+ mark_page_accessed(page);
/*
* Ok, we have the page, and it's up-to-date, so
Index: linux-2.6-cart/mm/shmem.c
===================================================================
--- linux-2.6-cart.orig/mm/shmem.c
+++ linux-2.6-cart/mm/shmem.c
@@ -1500,11 +1500,8 @@ static void do_shmem_file_read(struct fi
*/
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- /*
- * Mark the page accessed if we read the beginning.
- */
- if (!offset)
- mark_page_accessed(page);
+
+ mark_page_accessed(page);
} else
page = ZERO_PAGE(0);
Index: linux-2.6-cart/mm/swap.c
===================================================================
--- linux-2.6-cart.orig/mm/swap.c
+++ linux-2.6-cart/mm/swap.c
@@ -97,37 +97,12 @@ int rotate_reclaimable_page(struct page
}
/*
- * FIXME: speed this up?
- */
-void fastcall activate_page(struct page *page)
-{
- struct zone *zone = page_zone(page);
-
- spin_lock_irq(&zone->lru_lock);
- if (PageLRU(page) && !PageActive(page)) {
- del_page_from_inactive_list(zone, page);
- SetPageActive(page);
- add_page_to_active_list(zone, page);
- inc_page_state(pgactivate);
- }
- spin_unlock_irq(&zone->lru_lock);
-}
-
-/*
* Mark a page as having seen activity.
- *
- * inactive,unreferenced -> inactive,referenced
- * inactive,referenced -> active,unreferenced
- * active,unreferenced -> active,referenced
*/
void fastcall mark_page_accessed(struct page *page)
{
- if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
- activate_page(page);
- ClearPageReferenced(page);
- } else if (!PageReferenced(page)) {
+ if (!PageReferenced(page))
SetPageReferenced(page);
- }
}
EXPORT_SYMBOL(mark_page_accessed);
Index: linux-2.6-cart/mm/swapfile.c
===================================================================
--- linux-2.6-cart.orig/mm/swapfile.c
+++ linux-2.6-cart/mm/swapfile.c
@@ -408,7 +408,7 @@ static void unuse_pte(struct vm_area_str
* Move the page to the active list so it is not
* immediately swapped out again after swapon.
*/
- activate_page(page);
+ SetPageReferenced(page);
}
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -508,7 +508,7 @@ static int unuse_mm(struct mm_struct *mm
* Activate page so shrink_cache is unlikely to unmap its
* ptes while lock is dropped, so swapoff can make progress.
*/
- activate_page(page);
+ SetPageReferenced(page);
unlock_page(page);
down_read(&mm->mmap_sem);
lock_page(page);
Index: linux-2.6-cart/mm/vmscan.c
===================================================================
--- linux-2.6-cart.orig/mm/vmscan.c
+++ linux-2.6-cart/mm/vmscan.c
@@ -235,27 +235,6 @@ static int shrink_slab(unsigned long sca
return ret;
}
-/* Called without lock on whether page is mapped, so answer is unstable */
-static inline int page_mapping_inuse(struct page *page)
-{
- struct address_space *mapping;
-
- /* Page is in somebody's page tables. */
- if (page_mapped(page))
- return 1;
-
- /* Be more reluctant to reclaim swapcache than pagecache */
- if (PageSwapCache(page))
- return 1;
-
- mapping = page_mapping(page);
- if (!mapping)
- return 0;
-
- /* File is mmap'd by somebody? */
- return mapping_mapped(mapping);
-}
-
static inline int is_page_cache_freeable(struct page *page)
{
return page_count(page) - !!PagePrivate(page) == 2;
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [RFC][PATCH 6/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
` (4 preceding siblings ...)
2005-08-27 21:58 ` [RFC][PATCH 5/6] " a.p.zijlstra
@ 2005-08-27 21:58 ` a.p.zijlstra
2005-08-28 0:25 ` [RFC][PATCH 0/6] " Marcelo Tosatti
6 siblings, 0 replies; 12+ messages in thread
From: a.p.zijlstra @ 2005-08-27 21:58 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: cart-use-cart.patch --]
[-- Type: text/plain, Size: 14913 bytes --]
Index: linux-2.6-cart/fs/exec.c
===================================================================
--- linux-2.6-cart.orig/fs/exec.c
+++ linux-2.6-cart/fs/exec.c
@@ -331,7 +331,7 @@ void install_arg_page(struct vm_area_str
goto out;
}
inc_mm_counter(mm, rss);
- lru_cache_add_active(page);
+ lru_cache_add(page);
set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
page, vma->vm_page_prot))));
page_add_anon_rmap(page, vma, address);
Index: linux-2.6-cart/include/linux/swap.h
===================================================================
--- linux-2.6-cart.orig/include/linux/swap.h
+++ linux-2.6-cart/include/linux/swap.h
@@ -191,8 +191,6 @@ extern unsigned int nr_free_pagecache_pa
/* linux/mm/swap.c */
extern void FASTCALL(lru_cache_add(struct page *));
-extern void FASTCALL(lru_cache_add_active(struct page *));
-extern void FASTCALL(activate_page(struct page *));
extern void FASTCALL(mark_page_accessed(struct page *));
extern void lru_add_drain(void);
extern int rotate_reclaimable_page(struct page *page);
Index: linux-2.6-cart/mm/memory.c
===================================================================
--- linux-2.6-cart.orig/mm/memory.c
+++ linux-2.6-cart/mm/memory.c
@@ -1304,7 +1304,7 @@ static int do_wp_page(struct mm_struct *
page_remove_rmap(old_page);
flush_cache_page(vma, address, pfn);
break_cow(vma, new_page, address, page_table);
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
page_add_anon_rmap(new_page, vma, address);
/* Free the old page.. */
@@ -1782,7 +1782,7 @@ do_anonymous_page(struct mm_struct *mm,
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
- lru_cache_add_active(page);
+ lru_cache_add(page);
SetPageReferenced(page);
page_add_anon_rmap(page, vma, addr);
}
@@ -1903,7 +1903,8 @@ retry:
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
+ SetPageReferenced(new_page);
page_add_anon_rmap(new_page, vma, address);
} else
page_add_file_rmap(new_page);
Index: linux-2.6-cart/mm/swap.c
===================================================================
--- linux-2.6-cart.orig/mm/swap.c
+++ linux-2.6-cart/mm/swap.c
@@ -78,16 +78,17 @@ int rotate_reclaimable_page(struct page
return 1;
if (PageDirty(page))
return 1;
- if (PageActive(page))
- return 1;
if (!PageLRU(page))
return 1;
zone = page_zone(page);
spin_lock_irqsave(&zone->lru_lock, flags);
- if (PageLRU(page) && !PageActive(page)) {
+ if (PageLRU(page)) {
list_del(&page->lru);
- list_add_tail(&page->lru, &zone->inactive_list);
+ if (PageActive(page))
+ list_add(&page->lru, &zone->active_list);
+ else
+ list_add(&page->lru, &zone->inactive_list);
inc_page_state(pgrotated);
}
if (!test_clear_page_writeback(page))
@@ -112,7 +113,6 @@ EXPORT_SYMBOL(mark_page_accessed);
* @page: the page to add
*/
static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
void fastcall lru_cache_add(struct page *page)
{
@@ -124,25 +124,12 @@ void fastcall lru_cache_add(struct page
put_cpu_var(lru_add_pvecs);
}
-void fastcall lru_cache_add_active(struct page *page)
-{
- struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
-
- page_cache_get(page);
- if (!pagevec_add(pvec, page))
- __pagevec_lru_add_active(pvec);
- put_cpu_var(lru_add_active_pvecs);
-}
-
void lru_add_drain(void)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
if (pagevec_count(pvec))
__pagevec_lru_add(pvec);
- pvec = &__get_cpu_var(lru_add_active_pvecs);
- if (pagevec_count(pvec))
- __pagevec_lru_add_active(pvec);
put_cpu_var(lru_add_pvecs);
}
@@ -278,7 +265,9 @@ void __pagevec_lru_add(struct pagevec *p
}
if (TestSetPageLRU(page))
BUG();
- add_page_to_inactive_list(zone, page);
+ if (TestClearPageActive(page))
+ BUG();
+ __cart_insert(zone, page);
}
if (zone)
spin_unlock_irq(&zone->lru_lock);
@@ -288,33 +277,6 @@ void __pagevec_lru_add(struct pagevec *p
EXPORT_SYMBOL(__pagevec_lru_add);
-void __pagevec_lru_add_active(struct pagevec *pvec)
-{
- int i;
- struct zone *zone = NULL;
-
- for (i = 0; i < pagevec_count(pvec); i++) {
- struct page *page = pvec->pages[i];
- struct zone *pagezone = page_zone(page);
-
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
- }
- if (TestSetPageLRU(page))
- BUG();
- if (TestSetPageActive(page))
- BUG();
- add_page_to_active_list(zone, page);
- }
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- release_pages(pvec->pages, pvec->nr, pvec->cold);
- pagevec_reinit(pvec);
-}
-
/*
* Try to drop buffers from the pages in a pagevec
*/
@@ -396,9 +358,6 @@ static void lru_drain_cache(unsigned int
/* CPU is dead, so no locking needed. */
if (pagevec_count(pvec))
__pagevec_lru_add(pvec);
- pvec = &per_cpu(lru_add_active_pvecs, cpu);
- if (pagevec_count(pvec))
- __pagevec_lru_add_active(pvec);
}
/* Drop the CPU's cached committed space back into the central pool. */
Index: linux-2.6-cart/mm/swap_state.c
===================================================================
--- linux-2.6-cart.orig/mm/swap_state.c
+++ linux-2.6-cart/mm/swap_state.c
@@ -359,7 +359,7 @@ struct page *read_swap_cache_async(swp_e
/*
* Initiate read into locked page and return.
*/
- lru_cache_add_active(new_page);
+ lru_cache_add(new_page);
swap_readpage(NULL, new_page);
return new_page;
}
Index: linux-2.6-cart/mm/vmscan.c
===================================================================
--- linux-2.6-cart.orig/mm/vmscan.c
+++ linux-2.6-cart/mm/vmscan.c
@@ -376,8 +376,6 @@ static int shrink_list(struct list_head
if (TestSetPageLocked(page))
goto keep;
- BUG_ON(PageActive(page));
-
sc->nr_scanned++;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
@@ -498,6 +496,7 @@ static int shrink_list(struct list_head
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page->private };
+ cart_remember(page);
__delete_from_swap_cache(page);
write_unlock_irq(&mapping->tree_lock);
swap_free(swap);
@@ -506,11 +505,13 @@ static int shrink_list(struct list_head
}
#endif /* CONFIG_SWAP */
+ cart_remember(page);
__remove_from_page_cache(page);
write_unlock_irq(&mapping->tree_lock);
__put_page(page);
free_it:
+ ClearPageActive(page);
unlock_page(page);
reclaimed++;
if (!pagevec_add(&freed_pvec, page))
@@ -545,33 +546,32 @@ keep:
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @src: The LRU list to pull pages off.
+ * @zone: The zone to get pages from.
* @dst: The temp list to put pages on to.
* @scanned: The number of pages that were scanned.
*
* returns how many pages were moved onto *@dst.
*/
-static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
+static int isolate_lru_pages(int nr_to_scan, struct zone *zone,
struct list_head *dst, int *scanned)
{
int nr_taken = 0;
struct page *page;
int scan = 0;
- while (scan++ < nr_to_scan && !list_empty(src)) {
- page = lru_to_page(src);
- prefetchw_prev_lru_page(page, src, flags);
+ while (scan++ < nr_to_scan) {
+ page = __cart_replace(zone);
+ if (!page) break;
if (!TestClearPageLRU(page))
BUG();
- list_del(&page->lru);
if (get_page_testone(page)) {
/*
* It is being freed elsewhere
*/
__put_page(page);
SetPageLRU(page);
- list_add(&page->lru, src);
+ __cart_reinsert(zone, page);
continue;
} else {
list_add(&page->lru, dst);
@@ -603,9 +603,7 @@ static void shrink_cache(struct zone *zo
int nr_freed;
nr_taken = isolate_lru_pages(sc->swap_cluster_max,
- &zone->inactive_list,
- &page_list, &nr_scan);
- zone->nr_inactive -= nr_taken;
+ zone, &page_list, &nr_scan);
zone->pages_scanned += nr_scan;
spin_unlock_irq(&zone->lru_lock);
@@ -632,10 +630,7 @@ static void shrink_cache(struct zone *zo
if (TestSetPageLRU(page))
BUG();
list_del(&page->lru);
- if (PageActive(page))
- add_page_to_active_list(zone, page);
- else
- add_page_to_inactive_list(zone, page);
+ __cart_reinsert(zone, page);
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
@@ -649,194 +644,34 @@ done:
}
/*
- * This moves pages from the active list to the inactive list.
- *
- * We move them the other way if the page is referenced by one or more
- * processes, from rmap.
- *
- * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone->lru_lock across the whole operation. But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone->lru_lock around each page. It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
- * It is safe to rely on PG_active against the non-LRU pages in here because
- * nobody will play with that bit on a non-LRU page.
- *
- * The downside is that we have to touch page->_count against each page.
- * But we had to alter page->flags anyway.
- */
-static void
-refill_inactive_zone(struct zone *zone, struct scan_control *sc)
-{
- int pgmoved;
- int pgdeactivate = 0;
- int pgscanned;
- int nr_pages = sc->nr_to_scan;
- LIST_HEAD(l_hold); /* The pages which were snipped off */
- LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
- LIST_HEAD(l_active); /* Pages to go onto the active_list */
- struct page *page;
- struct pagevec pvec;
- int reclaim_mapped = 0;
- long mapped_ratio;
- long distress;
- long swap_tendency;
-
- lru_add_drain();
- spin_lock_irq(&zone->lru_lock);
- pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
- &l_hold, &pgscanned);
- zone->pages_scanned += pgscanned;
- zone->nr_active -= pgmoved;
- spin_unlock_irq(&zone->lru_lock);
-
- /*
- * `distress' is a measure of how much trouble we're having reclaiming
- * pages. 0 -> no problems. 100 -> great trouble.
- */
- distress = 100 >> zone->prev_priority;
-
- /*
- * The point of this algorithm is to decide when to start reclaiming
- * mapped memory instead of just pagecache. Work out how much memory
- * is mapped.
- */
- mapped_ratio = (sc->nr_mapped * 100) / total_memory;
-
- /*
- * Now decide how much we really want to unmap some pages. The mapped
- * ratio is downgraded - just because there's a lot of mapped memory
- * doesn't necessarily mean that page reclaim isn't succeeding.
- *
- * The distress ratio is important - we don't want to start going oom.
- *
- * A 100% value of vm_swappiness overrides this algorithm altogether.
- */
- swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
-
- /*
- * Now use this metric to decide whether to start moving mapped memory
- * onto the inactive list.
- */
- if (swap_tendency >= 100)
- reclaim_mapped = 1;
-
- while (!list_empty(&l_hold)) {
- cond_resched();
- page = lru_to_page(&l_hold);
- list_del(&page->lru);
- if (page_mapped(page)) {
- if (!reclaim_mapped ||
- (total_swap_pages == 0 && PageAnon(page)) ||
- page_referenced(page, 0, sc->priority <= 0)) {
- list_add(&page->lru, &l_active);
- continue;
- }
- }
- list_add(&page->lru, &l_inactive);
- }
-
- pagevec_init(&pvec, 1);
- pgmoved = 0;
- spin_lock_irq(&zone->lru_lock);
- while (!list_empty(&l_inactive)) {
- page = lru_to_page(&l_inactive);
- prefetchw_prev_lru_page(page, &l_inactive, flags);
- if (TestSetPageLRU(page))
- BUG();
- if (!TestClearPageActive(page))
- BUG();
- list_move(&page->lru, &zone->inactive_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_inactive += pgmoved;
- spin_unlock_irq(&zone->lru_lock);
- pgdeactivate += pgmoved;
- pgmoved = 0;
- if (buffer_heads_over_limit)
- pagevec_strip(&pvec);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
- zone->nr_inactive += pgmoved;
- pgdeactivate += pgmoved;
- if (buffer_heads_over_limit) {
- spin_unlock_irq(&zone->lru_lock);
- pagevec_strip(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
-
- pgmoved = 0;
- while (!list_empty(&l_active)) {
- page = lru_to_page(&l_active);
- prefetchw_prev_lru_page(page, &l_active, flags);
- if (TestSetPageLRU(page))
- BUG();
- BUG_ON(!PageActive(page));
- list_move(&page->lru, &zone->active_list);
- pgmoved++;
- if (!pagevec_add(&pvec, page)) {
- zone->nr_active += pgmoved;
- pgmoved = 0;
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
- }
- }
- zone->nr_active += pgmoved;
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
-
- mod_page_state_zone(zone, pgrefill, pgscanned);
- mod_page_state(pgdeactivate, pgdeactivate);
-}
-
-/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
static void
shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_active;
- unsigned long nr_inactive;
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
- zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+ zone->nr_scan_active += ((zone->nr_active + zone->nr_inactive) >> sc->priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= sc->swap_cluster_max)
zone->nr_scan_active = 0;
else
nr_active = 0;
- zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
- nr_inactive = zone->nr_scan_inactive;
- if (nr_inactive >= sc->swap_cluster_max)
- zone->nr_scan_inactive = 0;
- else
- nr_inactive = 0;
sc->nr_to_reclaim = sc->swap_cluster_max;
- while (nr_active || nr_inactive) {
- if (nr_active) {
- sc->nr_to_scan = min(nr_active,
- (unsigned long)sc->swap_cluster_max);
- nr_active -= sc->nr_to_scan;
- refill_inactive_zone(zone, sc);
- }
-
- if (nr_inactive) {
- sc->nr_to_scan = min(nr_inactive,
- (unsigned long)sc->swap_cluster_max);
- nr_inactive -= sc->nr_to_scan;
- shrink_cache(zone, sc);
- if (sc->nr_to_reclaim <= 0)
- break;
- }
+ while (nr_active) {
+ sc->nr_to_scan = min(nr_active,
+ (unsigned long)sc->swap_cluster_max);
+ nr_active -= sc->nr_to_scan;
+ shrink_cache(zone, sc);
+ if (sc->nr_to_reclaim <= 0)
+ break;
}
throttle_vm_writeout();
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC][PATCH 0/6] CART Implementation
2005-08-27 21:57 [RFC][PATCH 0/6] CART Implementation a.p.zijlstra
` (5 preceding siblings ...)
2005-08-27 21:58 ` [RFC][PATCH 6/6] " a.p.zijlstra
@ 2005-08-28 0:25 ` Marcelo Tosatti
2005-08-28 8:03 ` Peter Zijlstra
6 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2005-08-28 0:25 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: linux-mm
Hi Peter,
On Sat, Aug 27, 2005 at 11:57:56PM +0200, a.p.zijlstra@chello.nl wrote:
> Hi All,
>
> (now split as per request)
>
> After another day of hard work I feel I have this CART implementation
> complete.
>
> It survives a pounding and the stats seem pretty stable.
>
> The things that need more work:
> 1) the hash function seems pretty lousy
> 2) __cart_remember() called from shrink_list() needs zone->lru_lock
>
> The whole non-resident code is based on the idea that the hash function
> gives an even spread so that:
>
> B1_j B1
> ------ ~ ----
> B2_j B2
>
> However after a pounding the variance in (B1_j - B2_j) as given by the
> std. deviation: sqrt(<x^2> - <x>^2) is around 10. And this for a bucket
> with 57 slots.
>
> The other issue is that __cart_remember() needs the zone->lru_lock. This
> function is called from shrink_list() where the lock is explicitly
> avoided, so this seems like an issue. Alternatives would be atomic_t for
> zone->nr_q or a per cpu counter delta. Suggestions?
>
> Also I made quite some changes in swap.c and vmscan.c without being an
> expert on the code. Did I foul up too bad?
>
> Then ofcourse I need to benchmark, suggestions?
>
> Some of this code is shamelessly copied from Rik van Riel, other parts
> are inspired by code from Rahul Iyer.
>
> Any comments appreciated.
+/* This function selects the candidate and returns the corresponding
+ * struct page * or returns NULL in case no page can be freed.
+ */
+struct page *__cart_replace(struct zone *zone)
+{
+ struct page *page;
+ int referenced;
+
+ while (!list_empty(list_T2)) {
+ page = list_entry(list_T2->next, struct page, lru);
+
+ if (!page_referenced(page, 0, 0))
+ break;
+
+ del_page_from_inactive_list(zone, page);
+ add_page_to_active_tail(zone, page);
+ SetPageActive(page);
+
+ cart_q_inc(zone);
+ }
If you find an unreferenced page in the T2 list you don't keep a reference
to it performing a search on the T1 list below? That looks bogus.
Apart from that, both while (!list_empty(list_T2)) are problematic. If there
are tons of referenced pages you simply loop, unlimited? And what about
the lru lock required for dealing with page->lru ?
Look at the original algorithm: it grabs SWAP_CLUSTER_MAX pages from the inactive
list, puts them into a CPU local list (on the stack), releases the lru lock,
and works on the isolated pages. You want something similar.
As for testing, STP is really easy:
http://www.osdl.org/lab_activities/kernel_testing/stp
+
+ while (!list_empty(list_T1)) {
+ page = list_entry(list_T1->next, struct page, lru);
+ referenced = page_referenced(page, 0, 0);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC][PATCH 0/6] CART Implementation
2005-08-28 0:25 ` [RFC][PATCH 0/6] " Marcelo Tosatti
@ 2005-08-28 8:03 ` Peter Zijlstra
0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2005-08-28 8:03 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: linux-mm
On Sat, 2005-08-27 at 21:25 -0300, Marcelo Tosatti wrote:
>
> +/* This function selects the candidate and returns the corresponding
> + * struct page * or returns NULL in case no page can be freed.
> + */
> +struct page *__cart_replace(struct zone *zone)
> +{
> + struct page *page;
> + int referenced;
> +
> + while (!list_empty(list_T2)) {
> + page = list_entry(list_T2->next, struct page, lru);
> +
> + if (!page_referenced(page, 0, 0))
> + break;
> +
> + del_page_from_inactive_list(zone, page);
> + add_page_to_active_tail(zone, page);
> + SetPageActive(page);
> +
> + cart_q_inc(zone);
> + }
>
> If you find an unreferenced page in the T2 list you don't keep a reference
> to it performing a search on the T1 list below? That looks bogus.
If the loop breaks (unreferenced page) the head page of T2 is the one.
All other pages are moved to the tail of T1, as per the Paper.
> Apart from that, both while (!list_empty(list_T2)) are problematic. If there
> are tons of referenced pages you simply loop, unlimited?
No, max |T2| times, after that the list is simply empty. As for the
other loop, that can run the initial |T1| times until it encounteres the
first page put on the list by the previous loop, or untill it made a
full loop. page_referenced() clears the flag right?
> And what about
> the lru lock required for dealing with page->lru ?
As the __ prefix in the name suggests it is run under zone->lru_lock.
I'll some comments.
> Look at the original algorithm: it grabs SWAP_CLUSTER_MAX pages from the inactive
> list, puts them into a CPU local list (on the stack), releases the lru lock,
> and works on the isolated pages. You want something similar.
I do, look at patch 6 where I put this thing into action.
isolate_lru_pages() is modified to remove nr_to_scan = SWAP_CLUSTER_MAX
pages from the lists. From there on it is similar to the current code.
> As for testing, STP is really easy:
>
> http://www.osdl.org/lab_activities/kernel_testing/stp
>
Thanks, I'll have a look.
Kind regards,
--
Peter Zijlstra <a.p.zijlstra@chello.nl>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread