From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Jens Axboe <axboe@suse.de>
Cc: David Chinner <dgc@sgi.com>, Christoph Lameter <clameter@sgi.com>,
Andrew Morton <akpm@osdl.org>,
linux-kernel@vger.kernel.org, npiggin@suse.de,
linux-mm@kvack.org
Subject: Re: Lockless page cache test results
Date: Sun, 30 Apr 2006 21:44:53 +1000 [thread overview]
Message-ID: <4454A335.7070600@yahoo.com.au> (raw)
In-Reply-To: <20060430113905.GF23137@suse.de>
[-- Attachment #1: Type: text/plain, Size: 257 bytes --]
Jens Axboe wrote:
> You killed a lock too many there. I'm sure it'd help scalability,
> though :-)
Ssshh! That was my secret plan :)
Good catch though, thanks. I'll attach a new patch in case David
has a chance to try it out.
--
SUSE Labs, Novell Inc.
[-- Attachment #2: mm-batch-ra-pagecache-add.patch --]
[-- Type: text/plain, Size: 6960 bytes --]
Index: linux-2.6/fs/mpage.c
===================================================================
--- linux-2.6.orig/fs/mpage.c 2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/fs/mpage.c 2006-04-30 21:42:15.000000000 +1000
@@ -26,6 +26,7 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
+#include <linux/swap.h>
/*
* I/O completion handler for multipage BIOs.
@@ -389,31 +390,57 @@ mpage_readpages(struct address_space *ma
struct bio *bio = NULL;
unsigned page_idx;
sector_t last_block_in_bio = 0;
- struct pagevec lru_pvec;
+ struct pagevec pvec;
struct buffer_head map_bh;
unsigned long first_logical_block = 0;
clear_buffer_mapped(&map_bh);
- pagevec_init(&lru_pvec, 0);
+ pagevec_init(&pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
- bio = do_mpage_readpage(bio, page,
- nr_pages - page_idx,
- &last_block_in_bio, &map_bh,
- &first_logical_block,
- get_block);
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- } else {
- page_cache_release(page);
+
+ if (!pagevec_add(&pvec, page) || page_idx == nr_pages-1) {
+ int i = 0, in_cache;
+
+ if (radix_tree_preload(GFP_KERNEL))
+ goto pagevec_error;
+
+ write_lock_irq(&mapping->tree_lock);
+ for (; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ unsigned long offset = page->index;
+
+ if (__add_to_page_cache(page, mapping, offset))
+ break; /* error */
+ }
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+
+ in_cache = i;
+ for (i = 0; i < in_cache; i++) {
+ struct page *page = pvec.pages[i];
+
+ bio = do_mpage_readpage(bio, page,
+ nr_pages - page_idx,
+ &last_block_in_bio, &map_bh,
+ &first_logical_block,
+ get_block);
+ lru_cache_add(page);
+ }
+
+pagevec_error:
+ for (; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ page_cache_release(page);
+ }
+
+ pagevec_reinit(&pvec);
}
}
- pagevec_lru_add(&lru_pvec);
+
BUG_ON(!list_empty(pages));
if (bio)
mpage_bio_submit(READ, bio);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c 2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/mm/filemap.c 2006-04-30 21:42:42.000000000 +1000
@@ -394,6 +394,21 @@ int filemap_write_and_wait_range(struct
return err;
}
+int __add_to_page_cache(struct page *page, struct address_space *mapping,
+ pgoff_t offset)
+{
+ int error = radix_tree_insert(&mapping->page_tree, offset, page);
+ if (!error) {
+ page_cache_get(page);
+ SetPageLocked(page);
+ page->mapping = mapping;
+ page->index = offset;
+ mapping->nrpages++;
+ pagecache_acct(1);
+ }
+ return error;
+}
+
/*
* This function is used to add newly allocated pagecache pages:
* the page is new, so we can just run SetPageLocked() against it.
@@ -408,18 +423,11 @@ int add_to_page_cache(struct page *page,
if (error == 0) {
write_lock_irq(&mapping->tree_lock);
- error = radix_tree_insert(&mapping->page_tree, offset, page);
- if (!error) {
- page_cache_get(page);
- SetPageLocked(page);
- page->mapping = mapping;
- page->index = offset;
- mapping->nrpages++;
- pagecache_acct(1);
- }
+ error = __add_to_page_cache(page, mapping, offset);
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
}
+
return error;
}
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c 2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/mm/readahead.c 2006-04-30 21:42:15.000000000 +1000
@@ -14,6 +14,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
+#include <linux/swap.h>
void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
{
@@ -164,37 +165,60 @@ int read_cache_pages(struct address_spac
EXPORT_SYMBOL(read_cache_pages);
-static int read_pages(struct address_space *mapping, struct file *filp,
+static void __pagevec_read_pages(struct file *filp,
+ struct address_space *mapping, struct pagevec *pvec)
+{
+ int i = 0, in_cache;
+
+ if (radix_tree_preload(GFP_KERNEL))
+ goto out_error;
+
+ write_lock_irq(&mapping->tree_lock);
+ for (; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+ unsigned long offset = page->index;
+
+ if (__add_to_page_cache(page, mapping, offset))
+ break; /* error */
+ }
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+
+ in_cache = i;
+ for (i = 0; i < in_cache; i++) {
+ struct page *page = pvec->pages[i];
+ mapping->a_ops->readpage(filp, page);
+ lru_cache_add(page);
+ }
+
+out_error:
+ for (; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+ page_cache_release(page);
+ }
+
+ pagevec_reinit(pvec);
+}
+
+static void read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
{
- unsigned page_idx;
- struct pagevec lru_pvec;
- int ret;
+ unsigned i;
+ struct pagevec pvec;
if (mapping->a_ops->readpages) {
- ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
- goto out;
+ mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+ return;
}
- pagevec_init(&lru_pvec, 0);
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+ pagevec_init(&pvec, 0);
+ for (i = 0; i < nr_pages; i++) {
struct page *page = list_to_page(pages);
list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
- ret = mapping->a_ops->readpage(filp, page);
- if (ret != AOP_TRUNCATED_PAGE) {
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- continue;
- } /* else fall through to release */
- }
- page_cache_release(page);
+
+ if (!pagevec_add(&pvec, page) || i == nr_pages-1)
+ __pagevec_read_pages(filp, mapping, &pvec);
}
- pagevec_lru_add(&lru_pvec);
- ret = 0;
-out:
- return ret;
}
/*
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h 2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/include/linux/pagemap.h 2006-04-30 21:42:16.000000000 +1000
@@ -97,6 +97,8 @@ extern struct page * read_cache_page(str
extern int read_cache_pages(struct address_space *mapping,
struct list_head *pages, filler_t *filler, void *data);
+int __add_to_page_cache(struct page *page, struct address_space *mapping,
+ unsigned long index);
int add_to_page_cache(struct page *page, struct address_space *mapping,
unsigned long index, gfp_t gfp_mask);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
next prev parent reply other threads:[~2006-04-30 11:44 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-26 13:53 Jens Axboe
2006-04-26 14:43 ` Nick Piggin
2006-04-26 19:46 ` Jens Axboe
2006-04-27 5:39 ` Chen, Kenneth W
2006-04-27 6:07 ` Nick Piggin
2006-04-27 6:15 ` Andi Kleen
2006-04-27 7:51 ` Chen, Kenneth W
2006-04-26 16:55 ` Andrew Morton
2006-04-26 17:42 ` Jens Axboe
2006-04-26 18:10 ` Andrew Morton
2006-04-26 18:23 ` Jens Axboe
2006-04-26 18:46 ` Andrew Morton
2006-04-26 19:21 ` Jens Axboe
2006-04-27 5:58 ` Nick Piggin
2006-04-26 18:34 ` Christoph Lameter
2006-04-26 18:47 ` Andrew Morton
2006-04-26 18:48 ` Christoph Lameter
2006-04-26 18:49 ` Jens Axboe
2006-04-26 20:31 ` Christoph Lameter
2006-04-28 14:01 ` David Chinner
2006-04-28 14:10 ` David Chinner
2006-04-30 9:49 ` Nick Piggin
2006-04-30 11:20 ` Nick Piggin
2006-04-30 11:39 ` Jens Axboe
2006-04-30 11:44 ` Nick Piggin [this message]
2006-04-26 18:58 ` Christoph Hellwig
2006-04-26 19:02 ` Jens Axboe
2006-04-26 19:00 ` Linus Torvalds
2006-04-26 19:15 ` Jens Axboe
2006-04-26 20:12 ` Andrew Morton
2006-04-27 7:45 ` Jens Axboe
2006-04-27 7:47 ` Jens Axboe
2006-04-27 7:57 ` Nick Piggin
2006-04-27 8:02 ` Nick Piggin
2006-04-27 9:00 ` Jens Axboe
2006-04-27 13:36 ` Nick Piggin
2006-04-27 8:36 ` Jens Axboe
[not found] ` <20060428112835.GA8072@mail.ustc.edu.cn>
2006-04-28 11:28 ` Wu Fengguang
2006-04-27 5:49 ` Nick Piggin
2006-04-27 15:12 ` Linus Torvalds
2006-04-28 4:54 ` Nick Piggin
2006-04-28 5:34 ` Linus Torvalds
2006-04-27 9:35 ` Jens Axboe
2006-04-27 5:22 ` Nick Piggin
2006-04-26 18:57 ` Jens Axboe
2006-04-27 2:19 ` KAMEZAWA Hiroyuki
2006-04-27 8:03 ` Jens Axboe
2006-04-27 11:16 ` Jens Axboe
2006-04-27 11:41 ` KAMEZAWA Hiroyuki
2006-04-27 11:45 ` Jens Axboe
2006-04-28 9:10 ` Pavel Machek
2006-04-28 9:21 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4454A335.7070600@yahoo.com.au \
--to=nickpiggin@yahoo.com.au \
--cc=akpm@osdl.org \
--cc=axboe@suse.de \
--cc=clameter@sgi.com \
--cc=dgc@sgi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=npiggin@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox