From: Nick Piggin <npiggin@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>,
cotte@de.ibm.com, borntraeger@de.ibm.com,
Jared Hulbert <jaredeh@gmail.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Linux Memory Management List <linux-mm@kvack.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Hugh Dickins <hugh@veritas.com>
Subject: [patch] mm: xip fix fault vs sparse page invalidate race
Date: Mon, 18 Aug 2008 07:44:09 +0200 [thread overview]
Message-ID: <20080818054409.GB3011@wotan.suse.de> (raw)
In-Reply-To: <20080818053821.GA3011@wotan.suse.de>
XIP has a race between sparse pages being inserted into page tables, and
sparse pages being zapped when its time to put a non-sparse page in.
What can happen is that a process can be left with a dangling sparse page
in a MAP_SHARED mapping, while the rest of the world sees the non-sparse
version. Ie. data corruption.
Guard these operations with a seqlock, making fault-in-sparse-pages
the slowpath, and try-to-unmap-sparse-pages the fastpath.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
Index: linux-2.6/mm/filemap_xip.c
===================================================================
--- linux-2.6.orig/mm/filemap_xip.c 2008-08-18 13:43:53.000000000 +1000
+++ linux-2.6/mm/filemap_xip.c 2008-08-18 15:02:29.000000000 +1000
@@ -15,6 +15,8 @@
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
@@ -22,22 +24,18 @@
* We do use our own empty page to avoid interference with other users
* of ZERO_PAGE(), such as /dev/zero
*/
+static DEFINE_MUTEX(xip_sparse_mutex);
+static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
static struct page *__xip_sparse_page;
+/* called under xip_sparse_mutex */
static struct page *xip_sparse_page(void)
{
if (!__xip_sparse_page) {
struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
- if (page) {
- static DEFINE_SPINLOCK(xip_alloc_lock);
- spin_lock(&xip_alloc_lock);
- if (!__xip_sparse_page)
- __xip_sparse_page = page;
- else
- __free_page(page);
- spin_unlock(&xip_alloc_lock);
- }
+ if (page)
+ __xip_sparse_page = page;
}
return __xip_sparse_page;
}
@@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapp
pte_t pteval;
spinlock_t *ptl;
struct page *page;
+ unsigned count;
+ int locked = 0;
+
+ count = read_seqcount_begin(&xip_sparse_seq);
page = __xip_sparse_page;
if (!page)
return;
+retry:
spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm;
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapp
}
}
spin_unlock(&mapping->i_mmap_lock);
+
+ if (locked) {
+ mutex_unlock(&xip_sparse_mutex);
+ } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
+ mutex_lock(&xip_sparse_mutex);
+ locked = 1;
+ goto retry;
+ }
}
/*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area
int error;
/* XXX: are VM_FAULT_ codes OK? */
-
+again:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (vmf->pgoff >= size)
return VM_FAULT_SIGBUS;
@@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area
__xip_unmap(mapping, vmf->pgoff);
found:
+ printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn);
err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
xip_pfn);
if (err == -ENOMEM)
@@ -252,14 +264,34 @@ found:
BUG_ON(err);
return VM_FAULT_NOPAGE;
} else {
+ int err, ret = VM_FAULT_OOM;
+
+ mutex_lock(&xip_sparse_mutex);
+ write_seqcount_begin(&xip_sparse_seq);
+ error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
+ &xip_mem, &xip_pfn);
+ if (unlikely(!error)) {
+ write_seqcount_end(&xip_sparse_seq);
+ mutex_unlock(&xip_sparse_mutex);
+ goto again;
+ }
+ if (error != -ENODATA)
+ goto out;
/* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page();
if (!page)
- return VM_FAULT_OOM;
+ goto out;
+ err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
+ page);
+ if (err == -ENOMEM)
+ goto out;
- page_cache_get(page);
- vmf->page = page;
- return 0;
+ ret = VM_FAULT_NOPAGE;
+out:
+ write_seqcount_end(&xip_sparse_seq);
+ mutex_unlock(&xip_sparse_mutex);
+
+ return ret;
}
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-08-18 5:44 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-18 5:38 [patch] mm: dirty page tracking race fix Nick Piggin
2008-08-18 5:44 ` Nick Piggin [this message]
2008-08-18 6:03 ` [patch] mm: xip/ext2 fix block allocation race Nick Piggin
2008-08-18 10:51 ` Carsten Otte
2008-08-18 10:50 ` [patch] mm: xip fix fault vs sparse page invalidate race Carsten Otte
2008-08-18 7:49 ` [patch] mm: dirty page tracking race fix Peter Zijlstra
2008-08-18 8:03 ` Nick Piggin
2008-08-18 8:07 ` Peter Zijlstra
2008-08-18 8:12 ` Nick Piggin
2008-08-19 9:11 ` Andrew Morton
2008-08-19 10:19 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080818054409.GB3011@wotan.suse.de \
--to=npiggin@suse.de \
--cc=akpm@linux-foundation.org \
--cc=borntraeger@de.ibm.com \
--cc=cotte@de.ibm.com \
--cc=hugh@veritas.com \
--cc=jaredeh@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox