From: Ying Han <yinghan@google.com>
To: linux-mm@kvack.org, linux-kernel <linux-kernel@vger.kernel.org>,
akpm <akpm@linux-foundation.org>,
torvalds@linux-foundation.org, "Ingo Molnar" <mingo@elte.hu>,
"Mike Waychison" <mikew@google.com>,
"Rohit Seth" <rohitseth@google.com>,
"Hugh Dickins" <hugh@veritas.com>,
"Peter Zijlstra" <a.p.zijlstra@chello.nl>,
"H. Peter Anvin" <hpa@zytor.com>,
"Török Edwin" <edwintorok@gmail.com>,
"Lee Schermerhorn" <lee.schermerhorn@hp.com>,
"Nick Piggin" <npiggin@suse.de>,
"Wu Fengguang" <fengguang.wu@intel.com>
Subject: [PATCH][1/2]page_fault retry with NOPAGE_RETRY
Date: Wed, 8 Apr 2009 13:06:23 -0700 [thread overview]
Message-ID: <604427e00904081306vd7f2671x7b5ec8baf2b9bf58@mail.gmail.com> (raw)
support for FAULT_FLAG_RETRY with no user change:
Signed-off-by: Ying Han <yinghan@google.com>
Mike Waychison <mikew@google.com>
include/linux/fs.h | 2 +-
include/linux/mm.h | 2 +
mm/filemap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--
mm/memory.c | 33 +++++++++++++++++------
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a853ef..29c2c39 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -793,7 +793,7 @@ struct file_ra_state {
there are only # of pages ahead */
unsigned int ra_pages; /* Maximum readahead window */
- int mmap_miss; /* Cache miss stat for mmap accesses */
+ unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
loff_t prev_pos; /* Cache last read() position */
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffee2f7..5a134a9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,6 +144,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
+#define FAULT_FLAG_RETRY 0x04 /* Retry major fault */
/*
@@ -690,6 +691,7 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */
+#define VM_FAULT_RETRY 0x0010
#define VM_FAULT_OOM 0x0001
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
diff --git a/mm/filemap.c b/mm/filemap.c
index f3e5f89..6eb7c36 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -714,6 +714,58 @@ repeat:
EXPORT_SYMBOL(find_lock_page);
/**
+ * find_lock_page_retry - locate, pin and lock a pagecache page
+ * @mapping: the address_space to search
+ * @offset: the page index
+ * @vma: vma in which the fault was taken
+ * @ppage: zero if page not present, otherwise point to the page in pagecache
+ * @retry: 1 indicate caller tolerate a retry.
+ *
+ * If retry flag is on, and page is already locked by someone else, return
+ * a hint of retry and leave *ppage untouched.
+ *
+ * Return *ppage==NULL if page is not in pagecache. Otherwise return *ppage
+ * points to the page in the pagecache with ret=VM_FAULT_RETRY indicate a
+ * hint to caller for retry, or ret=0 which means page is succefully
+ * locked.
+ */
+unsigned find_lock_page_retry(struct address_space *mapping, pgoff_t offset,
+ struct vm_area_struct *vma, struct page **ppage,
+ int retry)
+{
+ unsigned int ret = 0;
+ struct page *page;
+
+repeat:
+ page = find_get_page(mapping, offset);
+ if (page) {
+ if (!retry)
+ lock_page(page);
+ else {
+ if (!trylock_page(page)) {
+ struct mm_struct *mm = vma->vm_mm;
+
+ up_read(&mm->mmap_sem);
+ wait_on_page_locked(page);
+ down_read(&mm->mmap_sem);
+
+ page_cache_release(page);
+ return VM_FAULT_RETRY;
+ }
+ }
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto repeat;
+ }
+ VM_BUG_ON(page->index != offset);
+ }
+ *ppage = page;
+ return ret;
+}
+EXPORT_SYMBOL(find_lock_page_retry);
+
+/**
* find_or_create_page - locate or add a pagecache page
* @mapping: the page's address_space
* @index: the page's index into the mapping
@@ -1444,6 +1496,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_
pgoff_t size;
int did_readaround = 0;
int ret = 0;
+ int retry_flag = vmf->flags & FAULT_FLAG_RETRY;
+ int retry_ret;
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (vmf->pgoff >= size)
@@ -1458,6 +1512,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_
*/
retry_find:
page = find_lock_page(mapping, vmf->pgoff);
+
/*
* For sequential accesses, we use the generic readahead logic.
*/
@@ -1465,7 +1520,13 @@ retry_find:
if (!page) {
page_cache_sync_readahead(mapping, ra, file,
vmf->pgoff, 1);
- page = find_lock_page(mapping, vmf->pgoff);
+ retry_ret = find_lock_page_retry(mapping, vmf->pgoff,
+ vma, &page, retry_flag);
+ if (retry_ret == VM_FAULT_RETRY) {
+ /* counteract the followed retry hit */
+ ra->mmap_miss++;
+ return retry_ret;
+ }
if (!page)
goto no_cached_page;
}
@@ -1504,7 +1565,14 @@ retry_find:
start = vmf->pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
}
- page = find_lock_page(mapping, vmf->pgoff);
+retry_find_retry:
+ retry_ret = find_lock_page_retry(mapping, vmf->pgoff,
+ vma, &page, retry_flag);
+ if (retry_ret == VM_FAULT_RETRY) {
+ /* counteract the followed retry hit */
+ ra->mmap_miss++;
+ return retry_ret;
+ }
if (!page)
goto no_cached_page;
}
@@ -1548,7 +1616,7 @@ no_cached_page:
* meantime, we'll just come back here and read it again.
*/
if (error >= 0)
- goto retry_find;
+ goto retry_find_retry;
/*
* An error return from page_cache_read can result if the
diff --git a/mm/memory.c b/mm/memory.c
index 164951c..5e215c9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2467,6 +2467,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_a
vmf.page = NULL;
ret = vma->vm_ops->fault(vma, &vmf);
+
+ /* page may be available, but we have to restart the process
+ * because mmap_sem was dropped during the ->fault
+ */
+ if (ret & VM_FAULT_RETRY)
+ return ret;
+
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
@@ -2611,8 +2618,10 @@ static int do_linear_fault(struct mm_struct *mm, struct
{
pgoff_t pgoff = (((address & PAGE_MASK)
- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
+ int write = write_access & ~FAULT_FLAG_RETRY;
+ unsigned int flags = (write ? FAULT_FLAG_WRITE : 0);
+ flags |= (write_access & FAULT_FLAG_RETRY);
pte_unmap(page_table);
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
@@ -2726,26 +2735,32 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_ar
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ int ret;
__set_current_state(TASK_RUNNING);
- count_vm_event(PGFAULT);
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, write_access);
+ if (unlikely(is_vm_hugetlb_page(vma))) {
+ ret = hugetlb_fault(mm, vma, address, write_access);
+ goto out;
+ }
+ ret = VM_FAULT_OOM;
pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address);
if (!pud)
- return VM_FAULT_OOM;
+ goto out;
pmd = pmd_alloc(mm, pud, address);
if (!pmd)
- return VM_FAULT_OOM;
+ goto out;
pte = pte_alloc_map(mm, pmd, address);
if (!pte)
- return VM_FAULT_OOM;
+ goto out;
- return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+ ret = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+out:
+ if (!(ret & VM_FAULT_RETRY))
+ count_vm_event(PGFAULT);
+ return ret;
}
#ifndef __PAGETABLE_PUD_FOLDED
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2009-04-08 20:06 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-08 20:06 Ying Han [this message]
-- strict thread matches above, loose matches on Subject: below --
2009-04-08 20:02 Ying Han
2009-04-09 7:47 ` Wu Fengguang
2009-04-09 16:21 ` Ying Han
2009-04-10 0:25 ` Wu Fengguang
2009-04-10 6:02 ` Andrew Morton
2009-04-10 6:32 ` Ying Han
2009-04-10 6:48 ` Wu Fengguang
2009-04-10 7:30 ` Wu Fengguang
2009-04-10 16:01 ` Linus Torvalds
2009-04-10 17:57 ` Ying Han
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=604427e00904081306vd7f2671x7b5ec8baf2b9bf58@mail.gmail.com \
--to=yinghan@google.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=edwintorok@gmail.com \
--cc=fengguang.wu@intel.com \
--cc=hpa@zytor.com \
--cc=hugh@veritas.com \
--cc=lee.schermerhorn@hp.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mikew@google.com \
--cc=mingo@elte.hu \
--cc=npiggin@suse.de \
--cc=rohitseth@google.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox