linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Arcangeli <andrea@suse.de>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.rutgers.edu
Subject: [patch] page-lru against 2.3.13-pre8
Date: Sun, 8 Aug 1999 20:34:58 +0200 (CEST)	[thread overview]
Message-ID: <Pine.LNX.4.10.9908082029320.29734-200000@laser.random> (raw)

[-- Attachment #1: Type: TEXT/PLAIN, Size: 275 bytes --]

I ported the minimal page-lru code to 2.3.13-pre8 because I noticed that
between 2.3.12 and 2.3.13-pre8 some related change (one from me ;) caused
the original patch to not apply cleanly.

The new page-lru patch is equivalent of the previous page_lru-2.3.12-H
patch.

Andrea

[-- Attachment #2: Type: TEXT/PLAIN, Size: 15275 bytes --]

diff -urN 2.3.13-pre8/fs/buffer.c 2.3.13-pre8-lru/fs/buffer.c
--- 2.3.13-pre8/fs/buffer.c	Sun Aug  8 17:21:37 1999
+++ 2.3.13-pre8-lru/fs/buffer.c	Sun Aug  8 20:08:19 1999
@@ -1247,7 +1247,7 @@
 	if (!PageLocked(page))
 		BUG();
 	if (!page->buffers)
-		return 0;
+		return 1;
 
 	head = page->buffers;
 	bh = head;
@@ -1288,10 +1288,13 @@
 	 */
 	if (!offset) {
 		if (!try_to_free_buffers(page))
+		{
 			atomic_add(PAGE_CACHE_SIZE, &buffermem);
+			return 0;
+		}
 	}
 
-	return 0;
+	return 1;
 }
 
 static void create_empty_buffers(struct page *page, struct inode *inode, unsigned long blocksize)
@@ -1899,6 +1902,7 @@
 static int grow_buffers(int size)
 {
 	unsigned long page;
+	struct page * page_map;
 	struct buffer_head *bh, *tmp;
 	struct buffer_head * insert_point;
 	int isize;
@@ -1941,7 +1945,9 @@
 	free_list[isize].list = bh;
 	spin_unlock(&free_list[isize].lock);
 
-	mem_map[MAP_NR(page)].buffers = bh;
+	page_map = mem_map + MAP_NR(page);
+	page_map->buffers = bh;
+	lru_cache_add(page_map);
 	atomic_add(PAGE_SIZE, &buffermem);
 	return 1;
 }
diff -urN 2.3.13-pre8/fs/dcache.c 2.3.13-pre8-lru/fs/dcache.c
--- 2.3.13-pre8/fs/dcache.c	Tue Jul 13 02:01:39 1999
+++ 2.3.13-pre8-lru/fs/dcache.c	Sun Aug  8 20:08:19 1999
@@ -20,6 +20,7 @@
 #include <linux/malloc.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -473,9 +474,11 @@
 {
 	if (gfp_mask & __GFP_IO) {
 		int count = 0;
+		lock_kernel();
 		if (priority)
 			count = dentry_stat.nr_unused / priority;
 		prune_dcache(count);
+		unlock_kernel();
 	}
 }
 
diff -urN 2.3.13-pre8/include/linux/mm.h 2.3.13-pre8-lru/include/linux/mm.h
--- 2.3.13-pre8/include/linux/mm.h	Wed Aug  4 12:28:17 1999
+++ 2.3.13-pre8-lru/include/linux/mm.h	Sun Aug  8 20:13:34 1999
@@ -125,6 +125,7 @@
 	struct page *next_hash;
 	atomic_t count;
 	unsigned long flags;	/* atomic flags, some possibly updated asynchronously */
+	struct list_head lru;
 	wait_queue_head_t wait;
 	struct page **pprev_hash;
 	struct buffer_head * buffers;
diff -urN 2.3.13-pre8/include/linux/swap.h 2.3.13-pre8-lru/include/linux/swap.h
--- 2.3.13-pre8/include/linux/swap.h	Sun Aug  8 19:41:44 1999
+++ 2.3.13-pre8-lru/include/linux/swap.h	Sun Aug  8 20:13:34 1999
@@ -64,6 +64,8 @@
 
 extern int nr_swap_pages;
 extern int nr_free_pages;
+extern int nr_lru_pages;
+extern struct list_head lru_cache;
 extern atomic_t nr_async_pages;
 extern struct inode swapper_inode;
 extern atomic_t page_cache_size;
@@ -160,6 +162,27 @@
 		count--;
 	return  count > 1;
 }
+
+extern spinlock_t pagemap_lru_lock;
+
+/*
+ * Helper macros for lru_pages handling.
+ */
+#define	lru_cache_add(page)			\
+do {						\
+	spin_lock(&pagemap_lru_lock);		\
+	list_add(&(page)->lru, &lru_cache);	\
+	nr_lru_pages++;				\
+	spin_unlock(&pagemap_lru_lock);		\
+} while (0)
+
+#define	lru_cache_del(page)			\
+do {						\
+	spin_lock(&pagemap_lru_lock);		\
+	list_del(&(page)->lru);			\
+	nr_lru_pages--;				\
+	spin_unlock(&pagemap_lru_lock);		\
+} while (0)
 
 #endif /* __KERNEL__*/
 
diff -urN 2.3.13-pre8/ipc/shm.c 2.3.13-pre8-lru/ipc/shm.c
--- 2.3.13-pre8/ipc/shm.c	Sun Aug  8 17:21:41 1999
+++ 2.3.13-pre8-lru/ipc/shm.c	Sun Aug  8 20:08:19 1999
@@ -719,10 +719,12 @@
 	int loop = 0;
 	int counter;
 	struct page * page_map;
+	int ret = 0;
 	
+	lock_kernel();
 	counter = shm_rss >> prio;
 	if (!counter || !(swap_nr = get_swap_page()))
-		return 0;
+		goto out_unlock;
 
  check_id:
 	shp = shm_segs[swap_id];
@@ -755,7 +757,7 @@
 	if (--counter < 0) { /* failed */
 		failed:
 		swap_free (swap_nr);
-		return 0;
+		goto out_unlock;
 	}
 	if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
 		goto check_table;
@@ -768,7 +770,10 @@
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
-	return 1;
+	ret = 1;
+ out_unlock:
+	unlock_kernel();
+	return ret;
 }
 
 /*
diff -urN 2.3.13-pre8/mm/filemap.c 2.3.13-pre8-lru/mm/filemap.c
--- 2.3.13-pre8/mm/filemap.c	Sun Aug  8 17:21:41 1999
+++ 2.3.13-pre8-lru/mm/filemap.c	Sun Aug  8 20:08:19 1999
@@ -33,6 +33,8 @@
  *
  * finished 'unifying' the page and buffer cache and SMP-threaded the
  * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
+ *
+ * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
  */
 
 atomic_t page_cache_size = ATOMIC_INIT(0);
@@ -40,6 +42,11 @@
 struct page **page_hash_table;
 
 spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
+/*
+ * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
+ *       the pagemap_lru_lock held.
+ */
+spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED;
 
 
 void __add_page_to_hash_queue(struct page * page, struct page **p)
@@ -117,6 +124,7 @@
 		}
 		if (page_count(page) != 2)
 			printk("hm, busy page invalidated? (not necesserily a bug)\n");
+		lru_cache_del(page);
 
 		remove_page_from_inode_queue(page);
 		remove_page_from_hash_queue(page);
@@ -151,8 +159,9 @@
 
 			lock_page(page);
 
-			if (inode->i_op->flushpage)
-				inode->i_op->flushpage(inode, page, 0);
+			if (!inode->i_op->flushpage ||
+			    inode->i_op->flushpage(inode, page, 0))
+				lru_cache_del(page);
 
 			/*
 			 * We remove the page from the page cache
@@ -216,81 +225,61 @@
 
 int shrink_mmap(int priority, int gfp_mask)
 {
-	static unsigned long clock = 0;
-	unsigned long limit = num_physpages << 1;
+	int ret = 0, count;
+	LIST_HEAD(young);
+	LIST_HEAD(old);
+	LIST_HEAD(forget);
+	struct list_head * page_lru, * dispose;
 	struct page * page;
-	int count, users;
 
-	count = limit >> priority;
+	count = nr_lru_pages / (priority+1);
 
-	page = mem_map + clock;
-	do {
-		int referenced;
+	spin_lock(&pagemap_lru_lock);
 
-		/* This works even in the presence of PageSkip because
-		 * the first two entries at the beginning of a hole will
-		 * be marked, not just the first.
-		 */
-		page++;
-		clock++;
-		if (clock >= max_mapnr) {
-			clock = 0;
-			page = mem_map;
-		}
-		if (PageSkip(page)) {
-			/* next_hash is overloaded for PageSkip */
-			page = page->next_hash;
-			clock = page - mem_map;
-		}
-		
-		referenced = test_and_clear_bit(PG_referenced, &page->flags);
+	while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache)
+	{
+		page = list_entry(page_lru, struct page, lru);
+		list_del(page_lru);
+
+		dispose = &lru_cache;
+		if (test_and_clear_bit(PG_referenced, &page->flags))
+			/* Roll the page at the top of the lru list,
+			 * we could also be more aggressive putting
+			 * the page in the young-dispose-list, so
+			 * avoiding to free young pages in each pass.
+			 */
+			goto dispose_continue;
 
+		dispose = &old;
+		/* don't account passes over not DMA pages */
 		if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
-			continue;
+			goto dispose_continue;
 
 		count--;
 
-		/*
-		 * Some common cases that we just short-circuit without
-		 * getting the locks - we need to re-check this once we
-		 * have the lock, but that's fine.
-		 */
-		users = page_count(page);
-		if (!users)
-			continue;
-		if (!page->buffers) {
-			if (!page->inode)
-				continue;
-			if (users > 1)
-				continue;
-		}
-
-		/*
-		 * ok, now the page looks interesting. Re-check things
-		 * and keep the lock.
-		 */
+		dispose = &young;
+		if (TryLockPage(page))
+			goto dispose_continue;
+
+		/* Release the pagemap_lru lock even if the page is not yet
+		   queued in any lru queue since we have just locked down
+		   the page so nobody else may SMP race with us running
+		   a lru_cache_del() (lru_cache_del() always run with the
+		   page locked down ;). */
+		spin_unlock(&pagemap_lru_lock);
+
+		/* avoid unscalable SMP locking */
+		if (!page->buffers && page_count(page) > 1)
+			goto unlock_noput_continue;
+
+		/* Take the pagecache_lock spinlock held to avoid
+		   other tasks to notice the page while we are looking at its
+		   page count. If it's a pagecache-page we'll free it
+		   in one atomic transaction after checking its page count. */
 		spin_lock(&pagecache_lock);
-		if (!page->inode && !page->buffers) {
-			spin_unlock(&pagecache_lock);
-			continue;
-		}
-		if (!page_count(page)) {
-			spin_unlock(&pagecache_lock);
-			BUG();
-			continue;
-		}
-		get_page(page);
-		if (TryLockPage(page)) {
-			spin_unlock(&pagecache_lock);
-			goto put_continue;
-		}
 
-		/*
-		 * we keep pagecache_lock locked and unlock it in
-		 * each branch, so that the page->inode case doesnt
-		 * have to re-grab it. Here comes the 'real' logic
-		 * to free memory:
-		 */
+		/* avoid freeing the page while it's locked */
+		get_page(page);
 
 		/* Is it a buffer page? */
 		if (page->buffers) {
@@ -301,7 +290,7 @@
 			if (!page->inode)
 			{
 				atomic_sub(PAGE_CACHE_SIZE, &buffermem);
-				goto made_progress;
+				goto made_buffer_progress;
 			}
 			spin_lock(&pagecache_lock);
 		}
@@ -311,7 +300,7 @@
 		 * (count == 2 because we added one ourselves above).
 		 */
 		if (page_count(page) != 2)
-			goto spin_unlock_continue;
+			goto cache_unlock_continue;
 
 		/*
 		 * Is it a page swap page? If so, we want to
@@ -320,35 +309,68 @@
 		 */
 		if (PageSwapCache(page)) {
 			spin_unlock(&pagecache_lock);
-			if (referenced && swap_count(page->offset) != 2)
-				goto unlock_continue;
 			__delete_from_swap_cache(page);
-			page_cache_release(page);
-			goto made_progress;
+			goto made_inode_progress;
 		}	
 
 		/* is it a page-cache page? */
-		if (!referenced && page->inode && !pgcache_under_min()) {
-			remove_page_from_inode_queue(page);
-			remove_page_from_hash_queue(page);
-			page->inode = NULL;
-			spin_unlock(&pagecache_lock);
-
-			page_cache_release(page);
-			goto made_progress;
+		if (page->inode)
+		{
+			dispose = &old;
+			if (!pgcache_under_min())
+			{
+				remove_page_from_inode_queue(page);
+				remove_page_from_hash_queue(page);
+				page->inode = NULL;
+				spin_unlock(&pagecache_lock);
+				goto made_inode_progress;
+			}
+			goto cache_unlock_continue;
 		}
-spin_unlock_continue:
+
+		dispose = &forget;
+		printk(KERN_ERR "shrink_mmap: unknown LRU page!\n");
+
+cache_unlock_continue:
 		spin_unlock(&pagecache_lock);
 unlock_continue:
 		UnlockPage(page);
-put_continue:
 		put_page(page);
-	} while (count > 0);
-	return 0;
-made_progress:
+dispose_relock_continue:
+		/* even if the dispose list is local, a truncate_inode_page()
+		   may remove a page from its queue so always
+		   synchronize with the lru lock while accesing the
+		   page->lru field */
+		spin_lock(&pagemap_lru_lock);
+		list_add(page_lru, dispose);
+		continue;
+
+unlock_noput_continue:
+		UnlockPage(page);
+		goto dispose_relock_continue;
+
+dispose_continue:
+		list_add(page_lru, dispose);
+	}
+	goto out;
+
+made_inode_progress:
+	page_cache_release(page);
+made_buffer_progress:
 	UnlockPage(page);
 	put_page(page);
-	return 1;
+	ret = 1;
+	spin_lock(&pagemap_lru_lock);
+	/* nr_lru_pages needs the spinlock */
+	nr_lru_pages--;
+
+out:
+	list_splice(&young, &lru_cache);
+	list_splice(&old, lru_cache.prev);
+
+	spin_unlock(&pagemap_lru_lock);
+
+	return ret;
 }
 
 static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page)
@@ -465,13 +487,14 @@
 {
 	unsigned long flags;
 
-	flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error));
-	page->flags = flags |  ((1 << PG_locked) | (1 << PG_referenced));
+	flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced));
+	page->flags = flags | (1 << PG_locked);
 	page->owner = current;	/* REMOVEME */
 	get_page(page);
 	page->offset = offset;
 	add_page_to_inode_queue(inode, page);
 	__add_page_to_hash_queue(page, hash);
+	lru_cache_add(page);
 }
 
 void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset)
diff -urN 2.3.13-pre8/mm/page_alloc.c 2.3.13-pre8-lru/mm/page_alloc.c
--- 2.3.13-pre8/mm/page_alloc.c	Tue Jul 13 02:02:40 1999
+++ 2.3.13-pre8-lru/mm/page_alloc.c	Sun Aug  8 20:08:19 1999
@@ -20,6 +20,8 @@
 
 int nr_swap_pages = 0;
 int nr_free_pages = 0;
+int nr_lru_pages;
+LIST_HEAD(lru_cache);
 
 /*
  * Free area management
@@ -127,7 +129,6 @@
 		if (PageLocked(page))
 			PAGE_BUG(page);
 
-		page->flags &= ~(1 << PG_referenced);
 		free_pages_ok(page - mem_map, 0);
 		return 1;
 	}
@@ -145,7 +146,6 @@
 				PAGE_BUG(map);
 			if (PageLocked(map))
 				PAGE_BUG(map);
-			map->flags &= ~(1 << PG_referenced);
 			free_pages_ok(map_nr, order);
 			return 1;
 		}
@@ -269,8 +269,9 @@
  	unsigned long total = 0;
 
 	printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
-	printk("Free: %d (%d %d %d)\n",
+	printk("Free: %d, lru_cache: %d (%d %d %d)\n",
 		nr_free_pages,
+		nr_lru_pages,
 		freepages.min,
 		freepages.low,
 		freepages.high);
diff -urN 2.3.13-pre8/mm/swap_state.c 2.3.13-pre8-lru/mm/swap_state.c
--- 2.3.13-pre8/mm/swap_state.c	Tue Jul 13 02:02:10 1999
+++ 2.3.13-pre8-lru/mm/swap_state.c	Sun Aug  8 20:08:19 1999
@@ -214,8 +214,6 @@
 		   page_address(page), page_count(page));
 #endif
 	PageClearSwapCache(page);
-	if (inode->i_op->flushpage)
-		inode->i_op->flushpage(inode, page, 0);
 	remove_inode_page(page);
 }
 
@@ -239,6 +237,15 @@
 	swap_free (entry);
 }
 
+static void delete_from_swap_cache_nolock(struct page *page)
+{
+	if (!swapper_inode.i_op->flushpage ||
+	    swapper_inode.i_op->flushpage(&swapper_inode, page, 0))
+		lru_cache_del(page);
+
+	__delete_from_swap_cache(page);
+}
+
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache.
@@ -247,7 +254,7 @@
 {
 	lock_page(page);
 
-	__delete_from_swap_cache(page);
+	delete_from_swap_cache_nolock(page);
 
 	UnlockPage(page);
 	page_cache_release(page);
@@ -267,9 +274,7 @@
 	 */
 	lock_page(page);
 	if (PageSwapCache(page) && !is_page_shared(page)) {
-		long entry = page->offset;
-		remove_from_swap_cache(page);
-		swap_free(entry);
+		delete_from_swap_cache_nolock(page);
 		page_cache_release(page);
 	}
 	UnlockPage(page);
diff -urN 2.3.13-pre8/mm/vmscan.c 2.3.13-pre8-lru/mm/vmscan.c
--- 2.3.13-pre8/mm/vmscan.c	Sun Aug  8 17:21:41 1999
+++ 2.3.13-pre8-lru/mm/vmscan.c	Sun Aug  8 20:11:42 1999
@@ -319,7 +319,9 @@
 {
 	struct task_struct * p;
 	int counter;
+	int __ret = 0;
 
+	lock_kernel();
 	/* 
 	 * We make one or two passes through the task list, indexed by 
 	 * assign = {0, 1}:
@@ -382,11 +384,13 @@
 
 			if (ret < 0)
 				kill_proc(pid, SIGBUS, 1);
-			return 1;
+			__ret = 1;
+			goto out;
 		}
 	}
 out:
-	return 0;
+	unlock_kernel();
+	return __ret;
 }
 
 /*
@@ -403,8 +407,6 @@
 	int priority;
 	int count = SWAP_CLUSTER_MAX;
 
-	lock_kernel();
-
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
@@ -432,7 +434,6 @@
 		shrink_dcache_memory(priority, gfp_mask);
 	} while (--priority >= 0);
 done:
-	unlock_kernel();
 
 	return priority >= 0;
 }

                 reply	other threads:[~1999-08-08 18:34 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.10.9908082029320.29734-200000@laser.random \
    --to=andrea@suse.de \
    --cc=linux-kernel@vger.rutgers.edu \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox