linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <npiggin@suse.de>
To: linux-arch@vger.kernel.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Linux Memory Management List <linux-mm@kvack.org>
Subject: [rfc] optimise unlock_page
Date: Tue, 8 May 2007 13:40:04 +0200	[thread overview]
Message-ID: <20070508114003.GB19294@wotan.suse.de> (raw)
In-Reply-To: <20070508113709.GA19294@wotan.suse.de>

This patch trades a page flag for a significant improvement in the unlock_page
fastpath. Various problems in the previous version were spotted by Hugh and
Ben (and fixed in this one).

Comments?

--

Speed up unlock_page by introducing a new page flag to signal that there are
page waitqueue waiters for PG_locked. This means a memory barrier and a random
waitqueue hash cacheline load can be avoided in the fastpath when there is no
contention.

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h	2007-05-08 15:30:39.000000000 +1000
+++ linux-2.6/include/linux/page-flags.h	2007-05-08 15:31:00.000000000 +1000
@@ -91,6 +91,8 @@
 #define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
+#define PG_waiters		20	/* Page has PG_locked waiters */
+
 /* PG_owner_priv_1 users should have descriptive aliases */
 #define PG_checked		PG_owner_priv_1 /* Used by some filesystems */
 
@@ -123,6 +125,10 @@
 #define TestClearPageLocked(page)	\
 		test_and_clear_bit(PG_locked, &(page)->flags)
 
+#define PageWaiters(page)	test_bit(PG_waiters, &(page)->flags)
+#define SetPageWaiters(page)	set_bit(PG_waiters, &(page)->flags)
+#define ClearPageWaiters(page)	clear_bit(PG_waiters, &(page)->flags)
+
 #define PageError(page)		test_bit(PG_error, &(page)->flags)
 #define SetPageError(page)	set_bit(PG_error, &(page)->flags)
 #define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h	2007-05-08 15:30:39.000000000 +1000
+++ linux-2.6/include/linux/pagemap.h	2007-05-08 16:55:07.000000000 +1000
@@ -133,7 +133,8 @@
 
 extern void FASTCALL(__lock_page(struct page *page));
 extern void FASTCALL(__lock_page_nosync(struct page *page));
-extern void FASTCALL(unlock_page(struct page *page));
+extern void FASTCALL(__wait_on_page_locked(struct page *page));
+extern void FASTCALL(__unlock_page(struct page *page));
 
 static inline int trylock_page(struct page *page)
 {
@@ -160,7 +161,15 @@
 	if (!trylock_page(page))
 		__lock_page_nosync(page);
 }
-	
+
+static inline void unlock_page(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page));
+	ClearPageLocked_Unlock(page);
+	if (unlikely(PageWaiters(page)))
+		__unlock_page(page);
+}
+
 /*
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  * Never use this directly!
@@ -176,8 +185,9 @@
  */
 static inline void wait_on_page_locked(struct page *page)
 {
+	might_sleep();
 	if (PageLocked(page))
-		wait_on_page_bit(page, PG_locked);
+		__wait_on_page_locked(page);
 }
 
 /* 
@@ -185,6 +195,7 @@
  */
 static inline void wait_on_page_writeback(struct page *page)
 {
+	might_sleep();
 	if (PageWriteback(page))
 		wait_on_page_bit(page, PG_writeback);
 }
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c	2007-05-08 15:30:55.000000000 +1000
+++ linux-2.6/mm/filemap.c	2007-05-08 18:04:20.000000000 +1000
@@ -169,6 +169,7 @@
 	return 0;
 }
 
+
 /**
  * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
  * @mapping:	address space structure to write
@@ -478,12 +479,6 @@
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
 
-static int __sleep_on_page_lock(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 /*
  * In order to wait for pages to become available there must be
  * waitqueues associated with pages. By using a hash table of
@@ -516,26 +511,22 @@
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
-/**
- * unlock_page - unlock a locked page
- * @page: the page
- *
- * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
- * Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechananism between PageLocked pages and PageWriteback pages is shared.
- * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
- *
- * The mb is necessary to enforce ordering between the clear_bit and the read
- * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
+/*
+ * If PageWaiters was found to be set at unlock-time, __unlock_page should
+ * be called to actually perform the wakeups of waiters.
  */
-void fastcall unlock_page(struct page *page)
+void fastcall __unlock_page(struct page *page)
 {
-	VM_BUG_ON(!PageLocked(page));
-	ClearPageLocked_Unlock(page);
+	ClearPageWaiters(page);
+ 	/*
+	 * The mb is necessary to enforce ordering between the clear_bit and
+	 * the read of the waitqueue (to avoid SMP races with a parallel
+	 * wait_on_page_locked()
+	 */
 	smp_mb__after_clear_bit();
 	wake_up_page(page, PG_locked);
 }
-EXPORT_SYMBOL(unlock_page);
+EXPORT_SYMBOL(__unlock_page);
 
 /**
  * end_page_writeback - end writeback against a page
@@ -563,10 +554,16 @@
  */
 void fastcall __lock_page(struct page *page)
 {
+	wait_queue_head_t *wq = page_waitqueue(page);
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
-	__wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
-							TASK_UNINTERRUPTIBLE);
+	do {
+		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		SetPageWaiters(page);
+		if (likely(PageLocked(page)))
+			sync_page(page);
+	} while (!trylock_page(page));
+	finish_wait(wq, &wait.wait);
 }
 EXPORT_SYMBOL(__lock_page);
 
@@ -576,10 +573,39 @@
  */
 void fastcall __lock_page_nosync(struct page *page)
 {
+	wait_queue_head_t *wq = page_waitqueue(page);
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
-							TASK_UNINTERRUPTIBLE);
+
+	do {
+		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		SetPageWaiters(page);
+		if (likely(PageLocked(page)))
+			io_schedule();
+	} while (!trylock_page(page));
+	finish_wait(wq, &wait.wait);
+}
+
+void fastcall __wait_on_page_locked(struct page *page)
+{
+	wait_queue_head_t *wq = page_waitqueue(page);
+	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+
+	do {
+		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		SetPageWaiters(page);
+		if (likely(PageLocked(page)))
+			sync_page(page);
+	} while (PageLocked(page));
+	finish_wait(wq, &wait.wait);
+
+	/*
+	 * Could skip this, but that would leave PG_waiters dangling
+	 * for random pages. This keeps it tidy.
+	 */
+	if (unlikely(PageWaiters(page)))
+		__unlock_page(page);
 }
+EXPORT_SYMBOL(__wait_on_page_locked);
 
 /**
  * find_get_page - find and get a page reference
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c	2007-05-08 15:30:39.000000000 +1000
+++ linux-2.6/mm/page_alloc.c	2007-05-08 15:31:00.000000000 +1000
@@ -203,7 +203,8 @@
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_buddy );
+			1 << PG_buddy |
+			1 << PG_waiters );
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
@@ -438,7 +439,8 @@
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved |
-			1 << PG_buddy ))))
+			1 << PG_buddy |
+			1 << PG_waiters ))))
 		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -588,7 +590,8 @@
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved |
-			1 << PG_buddy ))))
+			1 << PG_buddy |
+			1 << PG_waiters ))))
 		bad_page(page);
 
 	/*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

       reply	other threads:[~2007-05-08 11:40 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20070508113709.GA19294@wotan.suse.de>
2007-05-08 11:40 ` Nick Piggin [this message]
2007-05-08 20:08   ` Hugh Dickins
2007-05-08 21:30   ` Benjamin Herrenschmidt
2007-05-08 22:41     ` Nick Piggin
2007-05-08 22:50       ` Nick Piggin
2007-05-09 19:33         ` Hugh Dickins
2007-05-09 21:21           ` Benjamin Herrenschmidt
2007-05-10  3:37           ` Nick Piggin
2007-05-10 19:14             ` Hugh Dickins
2007-05-11  8:54               ` Nick Piggin
2007-05-11 13:15                 ` Hugh Dickins
2007-05-13  3:32                   ` Nick Piggin
2007-05-13  4:39                     ` Hugh Dickins
2007-05-13  6:52                       ` Nick Piggin
2007-05-16 17:54                         ` Hugh Dickins
2007-05-16 18:18                           ` Nick Piggin
2007-05-16 19:28                             ` Hugh Dickins
2007-05-16 19:47                               ` Linus Torvalds
2007-05-17  6:27                                 ` Nick Piggin
2007-05-16 17:21                     ` Hugh Dickins
2007-05-16 17:38                       ` Nick Piggin
2007-05-08 12:13 ` David Howells
2007-05-08 22:35   ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070508114003.GB19294@wotan.suse.de \
    --to=npiggin@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=benh@kernel.crashing.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox