From: npiggin@suse.de
To: akpm@linux-foundation.org, torvalds@linux-foundation.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
benh@kernel.crashing.org, paulus@samba.org
Subject: [patch 4/7] mm: lockless pagecache
Date: Thu, 05 Jun 2008 19:43:04 +1000 [thread overview]
Message-ID: <20080605094825.807238000@nick.local0.net> (raw)
In-Reply-To: <20080605094300.295184000@nick.local0.net>
[-- Attachment #1: mm-lockless-pagecache-lookups.patch --]
[-- Type: text/plain, Size: 6874 bytes --]
Combine page_cache_get_speculative with lockless radix tree lookups to
introduce lockless page cache lookups (ie. no mapping->tree_lock on
the read-side).
The only atomicity changes this introduces is that the gang pagecache
lookup functions now behave as if they are implemented with multiple
find_get_page calls, rather than operating on a snapshot of the pages.
In practice, this atomicity guarantee is not used anyway, and it is
difficult to see how it could be. Gang pagecache lookups are designed
to replace individual lookups, so these semantics are natural.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -640,15 +640,35 @@ void __lock_page_nosync(struct page *pag
* Is there a pagecache struct page at the given (mapping, offset) tuple?
* If yes, increment its refcount and return it; if no, return NULL.
*/
-struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
{
+ void **pagep;
struct page *page;
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
- if (page)
- page_cache_get(page);
- read_unlock_irq(&mapping->tree_lock);
+ rcu_read_lock();
+repeat:
+ page = NULL;
+ pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+ if (pagep) {
+ page = radix_tree_deref_slot(pagep);
+ if (unlikely(!page || page == RADIX_TREE_RETRY))
+ goto repeat;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /*
+ * Has the page moved?
+ * This is part of the lockless pagecache protocol. See
+ * include/linux/pagemap.h for details.
+ */
+ if (unlikely(page != *pagep)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+ }
+ rcu_read_unlock();
+
return page;
}
EXPORT_SYMBOL(find_get_page);
@@ -663,32 +683,22 @@ EXPORT_SYMBOL(find_get_page);
*
* Returns zero if the page was not present. find_lock_page() may sleep.
*/
-struct page *find_lock_page(struct address_space *mapping,
- pgoff_t offset)
+struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page;
repeat:
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
+ page = find_get_page(mapping, offset);
if (page) {
- page_cache_get(page);
- if (TestSetPageLocked(page)) {
- read_unlock_irq(&mapping->tree_lock);
- __lock_page(page);
-
- /* Has the page been truncated while we slept? */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
- }
- VM_BUG_ON(page->index != offset);
- goto out;
+ lock_page(page);
+ /* Has the page been truncated? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto repeat;
}
+ VM_BUG_ON(page->index != offset);
}
- read_unlock_irq(&mapping->tree_lock);
-out:
return page;
}
EXPORT_SYMBOL(find_lock_page);
@@ -754,13 +764,39 @@ unsigned find_get_pages(struct address_s
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, start, nr_pages);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
- read_unlock_irq(&mapping->tree_lock);
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
return ret;
}
@@ -781,19 +817,44 @@ unsigned find_get_pages_contig(struct ad
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, index, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, index, nr_pages);
- for (i = 0; i < ret; i++) {
- if (pages[i]->mapping == NULL || pages[i]->index != index)
+ if (page->mapping == NULL || page->index != index)
break;
- page_cache_get(pages[i]);
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
index++;
}
- read_unlock_irq(&mapping->tree_lock);
- return i;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(find_get_pages_contig);
@@ -813,15 +874,43 @@ unsigned find_get_pages_tag(struct addre
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
+ (void ***)pages, *index, nr_pages, tag);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
- (void **)pages, *index, nr_pages, tag);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
if (ret)
*index = pages[ret - 1]->index + 1;
- read_unlock_irq(&mapping->tree_lock);
+
return ret;
}
EXPORT_SYMBOL(find_get_pages_tag);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-06-05 9:43 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-06-05 9:43 [patch 0/7] speculative page references, lockless pagecache, lockless gup npiggin
2008-06-05 9:43 ` [patch 1/7] mm: readahead scan lockless npiggin
2008-06-05 9:43 ` [patch 2/7] radix-tree: add gang_lookup_slot, gang_lookup_slot_tag npiggin
2008-06-05 9:43 ` [patch 3/7] mm: speculative page references npiggin
2008-06-06 14:20 ` Peter Zijlstra
2008-06-06 16:26 ` Nick Piggin
2008-06-06 16:27 ` Nick Piggin
2008-06-09 4:48 ` Tim Pepper
2008-06-10 19:08 ` Christoph Lameter
2008-06-11 3:19 ` Nick Piggin
2008-06-05 9:43 ` npiggin [this message]
2008-06-05 9:43 ` [patch 5/7] mm: spinlock tree_lock npiggin
2008-06-05 9:43 ` [patch 6/7] powerpc: implement pte_special npiggin
2008-06-06 4:04 ` Benjamin Herrenschmidt
2008-06-05 9:43 ` [patch 7/7] powerpc: lockless get_user_pages_fast npiggin
2008-06-09 8:32 ` Andrew Morton
2008-06-10 3:15 ` Nick Piggin
2008-06-10 19:00 ` Christoph Lameter
2008-06-11 3:18 ` Nick Piggin
2008-06-11 4:40 ` Christoph Lameter
2008-06-11 4:41 ` Christoph Lameter
2008-06-11 4:49 ` Nick Piggin
2008-06-11 6:06 ` Andrew Morton
2008-06-11 6:24 ` Nick Piggin
2008-06-11 6:50 ` Andrew Morton
2008-06-11 23:20 ` Christoph Lameter
2008-06-11 4:47 ` Nick Piggin
2008-06-05 11:53 ` [patch 0/7] speculative page references, lockless pagecache, lockless gup Nick Piggin
2008-06-05 17:33 ` Linus Torvalds
2008-06-06 0:08 ` Nick Piggin
2008-06-06 21:32 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080605094825.807238000@nick.local0.net \
--to=npiggin@suse.de \
--cc=akpm@linux-foundation.org \
--cc=benh@kernel.crashing.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=paulus@samba.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox