From: Dave McCracken <dmccr@us.ibm.com>
To: Andrew Morton <akpm@digeo.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] Fix vmtruncate race and distributed filesystem race
Date: Thu, 12 Jun 2003 17:56:50 -0500 [thread overview]
Message-ID: <184910000.1055458610@baldur.austin.ibm.com> (raw)
In-Reply-To: <20030612144418.49f75066.akpm@digeo.com>
[-- Attachment #1: Type: text/plain, Size: 1124 bytes --]
--On Thursday, June 12, 2003 14:44:18 -0700 Andrew Morton <akpm@digeo.com>
wrote:
> Well it is not "worse". Futzing with i_sem in do_no_page() is pretty
> gross. You could add vm_ops->prevalidate() or something if it worries you.
Actually I've been studying the logic. I don't think we need to serialize
with i_sem at all. i_size has already been changed, so just doing a retry
immediately will be safe. Distributed filesystems should also be safe as
long as they mark the page invalid before they call invalidate_mmap_range().
I like your idea of doing an atomic_t instead of a seqlock. The original
idea from Andrea implied there was a range of time it was unstable, but
with this scheme a single increment is sufficient.
I also think if we can solve both the vmtruncate and the distributed file
system races without adding any vm_ops, we should.
Here's a new patch. Does this look better?
Dave
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
[-- Attachment #2: trunc-2.5.70-mm8-2.diff --]
[-- Type: text/plain, Size: 3966 bytes --]
--- 2.5.70-mm8/./drivers/mtd/devices/blkmtd.c 2003-05-26 20:00:21.000000000 -0500
+++ 2.5.70-mm8-trunc/./drivers/mtd/devices/blkmtd.c 2003-06-12 16:57:52.000000000 -0500
@@ -1189,6 +1189,7 @@ static int __init init_blkmtd(void)
INIT_LIST_HEAD(&mtd_rawdevice->as.locked_pages);
mtd_rawdevice->as.host = NULL;
init_MUTEX(&(mtd_rawdevice->as.i_shared_sem));
+ atomic_set(&(mtd_rawdevice->as.truncate_count), 0);
mtd_rawdevice->as.a_ops = &blkmtd_aops;
INIT_LIST_HEAD(&mtd_rawdevice->as.i_mmap);
--- 2.5.70-mm8/./include/linux/fs.h 2003-06-12 13:37:28.000000000 -0500
+++ 2.5.70-mm8-trunc/./include/linux/fs.h 2003-06-12 17:00:04.000000000 -0500
@@ -323,6 +323,7 @@ struct address_space {
struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of shared mappings */
struct semaphore i_shared_sem; /* protect both above lists */
+ atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long dirtied_when; /* jiffies of first page dirtying */
int gfp_mask; /* how to allocate the pages */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
--- 2.5.70-mm8/./fs/inode.c 2003-06-12 13:37:22.000000000 -0500
+++ 2.5.70-mm8-trunc/./fs/inode.c 2003-06-12 16:59:30.000000000 -0500
@@ -185,6 +185,7 @@ void inode_init_once(struct inode *inode
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.page_lock);
init_MUTEX(&inode->i_data.i_shared_sem);
+ atomic_set(&inode->i_data.truncate_count, 0);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
INIT_LIST_HEAD(&inode->i_data.i_mmap);
--- 2.5.70-mm8/./mm/swap_state.c 2003-05-26 20:00:39.000000000 -0500
+++ 2.5.70-mm8-trunc/./mm/swap_state.c 2003-06-12 16:59:53.000000000 -0500
@@ -44,6 +44,7 @@ struct address_space swapper_space = {
.i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap),
.i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared),
.i_shared_sem = __MUTEX_INITIALIZER(swapper_space.i_shared_sem),
+ .truncate_count = ATOMIC_INIT(0),
.private_lock = SPIN_LOCK_UNLOCKED,
.private_list = LIST_HEAD_INIT(swapper_space.private_list),
};
--- 2.5.70-mm8/./mm/memory.c 2003-06-12 13:37:31.000000000 -0500
+++ 2.5.70-mm8-trunc/./mm/memory.c 2003-06-12 17:51:55.000000000 -0500
@@ -1138,6 +1138,8 @@ invalidate_mmap_range(struct address_spa
hlen = ULONG_MAX - hba + 1;
}
down(&mapping->i_shared_sem);
+ /* Protect against page fault */
+ atomic_inc(&mapping->truncate_count);
if (unlikely(!list_empty(&mapping->i_mmap)))
invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen);
if (unlikely(!list_empty(&mapping->i_mmap_shared)))
@@ -1390,8 +1392,10 @@ do_no_page(struct mm_struct *mm, struct
unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
{
struct page * new_page;
+ struct address_space *mapping;
pte_t entry;
struct pte_chain *pte_chain;
+ unsigned sequence;
int ret;
if (!vma->vm_ops || !vma->vm_ops->nopage)
@@ -1400,6 +1404,9 @@ do_no_page(struct mm_struct *mm, struct
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+retry:
+ sequence = atomic_read(&mapping->truncate_count);
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
/* no page was available -- either SIGBUS or OOM */
@@ -1428,6 +1435,16 @@ do_no_page(struct mm_struct *mm, struct
}
spin_lock(&mm->page_table_lock);
+ /*
+ * For a file-backed vma, someone could have truncated or otherwise
+ * invalidated this page. If invalidate_mmap_range got called,
+ * retry getting the page.
+ */
+ if (unlikely(sequence != atomic_read(&mapping->truncate_count))) {
+ spin_unlock(&mm->page_table_lock);
+ page_cache_release(new_page);
+ goto retry;
+ }
page_table = pte_offset_map(pmd, address);
/*
next prev parent reply other threads:[~2003-06-12 22:56 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-06-12 20:16 Dave McCracken
2003-06-12 20:49 ` Andrew Morton
2003-06-12 21:00 ` Andrew Morton
2003-06-12 21:08 ` Dave McCracken
2003-06-12 21:44 ` Andrew Morton
2003-06-12 22:56 ` Dave McCracken [this message]
2003-06-12 23:07 ` Andrew Morton
2003-06-20 0:17 ` Andrea Arcangeli
2003-06-23 3:28 ` Paul E. McKenney
2003-06-23 6:29 ` Andrea Arcangeli
2003-06-23 6:32 ` Andrew Morton
2003-06-23 7:43 ` Andrea Arcangeli
2003-06-23 7:56 ` Andrew Morton
2003-06-23 8:10 ` Andrea Arcangeli
2003-06-24 1:37 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=184910000.1055458610@baldur.austin.ibm.com \
--to=dmccr@us.ibm.com \
--cc=akpm@digeo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox