linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Arcangeli <andrea@suse.de>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Matthew Wilcox <willy@debian.org>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Andrew Morton <akpm@osdl.org>,
	Linux Kernel list <linux-kernel@vger.kernel.org>,
	Ingo Molnar <mingo@elte.hu>, Ben LaHaise <bcrl@kvack.org>,
	linux-mm@kvack.org,
	Architectures Group <linux-arch@vger.kernel.org>
Subject: Re: [PATCH] ppc64: Fix possible race with set_pte on a present PTE
Date: Tue, 25 May 2004 23:44:50 +0200	[thread overview]
Message-ID: <20040525214450.GH29378@dualathlon.random> (raw)
In-Reply-To: <20040525212720.GG29378@dualathlon.random>

in the previous email I attached a slightly older version of the patch
that didn't optimize the spurious tlb flush away from do_wp_page yet,
this is the latest version I tested:

Index: linux-2.5/include/asm-alpha/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-alpha/pgtable.h,v
retrieving revision 1.22
diff -u -p -r1.22 pgtable.h
--- linux-2.5/include/asm-alpha/pgtable.h	23 May 2004 05:02:25 -0000	1.22
+++ linux-2.5/include/asm-alpha/pgtable.h	25 May 2004 20:34:11 -0000
@@ -267,6 +267,28 @@ extern inline pte_t pte_mkexec(pte_t pte
 extern inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= __DIRTY_BITS; return pte; }
 extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; return pte; }
 
+static inline void ptep_handle_young_page_fault(pte_t *ptep)
+{
+	/*
+	 * WARNING: this is safe only because the dirty bit
+	 * cannot change trasparently in hardware in the pte.
+	 * If the dirty bit in the pte would be set trasparently
+	 * by the CPU this should be implemented with set_bit()
+	 * or with a new set_mask64() implementing an atomic "or".
+	 */
+	set_pte(ptep, pte_mkyoung(*ptep));
+}
+
+static inline void ptep_handle_dirty_page_fault(pte_t *ptep)
+{
+	/*
+	 * This can run without atomic instructions even if
+	 * the young bit is set in hardware by the architecture.
+	 * Losing the young bit is not important.
+	 */
+	set_pte(ptep, pte_mkdirty(*ptep));
+}
+
 #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address))
 
 /* to find an entry in a kernel page-table-directory */
Index: linux-2.5/include/asm-generic/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-generic/pgtable.h,v
retrieving revision 1.4
diff -u -p -r1.4 pgtable.h
--- linux-2.5/include/asm-generic/pgtable.h	19 Jan 2004 18:43:03 -0000	1.4
+++ linux-2.5/include/asm-generic/pgtable.h	25 May 2004 20:38:39 -0000
@@ -12,6 +12,7 @@
  */
 #define ptep_establish(__vma, __address, __ptep, __entry)		\
 do {									\
+	BUG_ON(!pte_dirty(__entry));					\
 	set_pte(__ptep, __entry);					\
 	flush_tlb_page(__vma, __address);				\
 } while (0)
Index: linux-2.5/include/asm-i386/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-i386/pgtable.h,v
retrieving revision 1.42
diff -u -p -r1.42 pgtable.h
--- linux-2.5/include/asm-i386/pgtable.h	23 May 2004 05:02:25 -0000	1.42
+++ linux-2.5/include/asm-i386/pgtable.h	25 May 2004 20:27:00 -0000
@@ -220,7 +220,19 @@ static inline pte_t pte_mkdirty(pte_t pt
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
 
+/*
+ * This is used only to set the dirty bit during a "dirty" page fault.
+ * Nothing to do on x86 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_dirty_page_fault(ptep) do { } while (0)
 static inline  int ptep_test_and_clear_dirty(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
+/*
+ * This is used only to set the young bit during a "young" page fault.
+ * Nothing to do on x86 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_young_page_fault(ptep) do { } while (0)
 static inline  int ptep_test_and_clear_young(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); }
 static inline void ptep_set_wrprotect(pte_t *ptep)		{ clear_bit(_PAGE_BIT_RW, &ptep->pte_low); }
 static inline void ptep_mkdirty(pte_t *ptep)			{ set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
Index: linux-2.5/include/asm-x86_64/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-x86_64/pgtable.h,v
retrieving revision 1.27
diff -u -p -r1.27 pgtable.h
--- linux-2.5/include/asm-x86_64/pgtable.h	23 May 2004 05:02:25 -0000	1.27
+++ linux-2.5/include/asm-x86_64/pgtable.h	25 May 2004 20:27:28 -0000
@@ -262,7 +262,19 @@ extern inline pte_t pte_mkexec(pte_t pte
 extern inline pte_t pte_mkdirty(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
 extern inline pte_t pte_mkyoung(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
 extern inline pte_t pte_mkwrite(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
+/*
+ * This is used only to set the dirty bit during a "dirty" page fault.
+ * Nothing to do on x86-64 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_dirty_page_fault(ptep) do { } while (0)
 static inline  int ptep_test_and_clear_dirty(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); }
+/*
+ * This is used only to set the young bit during a "young" page fault.
+ * Nothing to do on x86-64 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_young_page_fault(ptep) do { } while(0)
 static inline  int ptep_test_and_clear_young(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); }
 static inline void ptep_set_wrprotect(pte_t *ptep)		{ clear_bit(_PAGE_BIT_RW, ptep); }
 static inline void ptep_mkdirty(pte_t *ptep)			{ set_bit(_PAGE_BIT_DIRTY, ptep); }
Index: linux-2.5/mm/memory.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/mm/memory.c,v
retrieving revision 1.169
diff -u -p -r1.169 memory.c
--- linux-2.5/mm/memory.c	23 May 2004 05:10:11 -0000	1.169
+++ linux-2.5/mm/memory.c	25 May 2004 21:20:11 -0000
@@ -1056,7 +1056,7 @@ static int do_wp_page(struct mm_struct *
 			flush_cache_page(vma, address);
 			entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
 					      vma);
-			ptep_establish(vma, address, page_table, entry);
+			set_pte(page_table, entry);
 			update_mmu_cache(vma, address, entry);
 			pte_unmap(page_table);
 			spin_unlock(&mm->page_table_lock);
@@ -1643,10 +1643,9 @@ static inline int handle_pte_fault(struc
 		if (!pte_write(entry))
 			return do_wp_page(mm, vma, address, pte, pmd, entry);
 
-		entry = pte_mkdirty(entry);
+		ptep_handle_dirty_page_fault(pte);
 	}
-	entry = pte_mkyoung(entry);
-	ptep_establish(vma, address, pte, entry);
+	ptep_handle_young_page_fault(pte);
 	update_mmu_cache(vma, address, entry);
 	pte_unmap(pte);
 	spin_unlock(&mm->page_table_lock);
Signed-off-by: Andrea Arcangeli <andrea@suse.de>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

  parent reply	other threads:[~2004-05-25 21:44 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1085369393.15315.28.camel@gaston>
     [not found] ` <Pine.LNX.4.58.0405232046210.25502@ppc970.osdl.org>
     [not found]   ` <1085371988.15281.38.camel@gaston>
     [not found]     ` <Pine.LNX.4.58.0405232134480.25502@ppc970.osdl.org>
     [not found]       ` <1085373839.14969.42.camel@gaston>
2004-05-24  5:10         ` Linus Torvalds
2004-05-24  5:34           ` Benjamin Herrenschmidt
2004-05-24  5:38             ` Benjamin Herrenschmidt
2004-05-24  5:52               ` Benjamin Herrenschmidt
2004-05-24  7:39           ` Ingo Molnar
2004-05-24  5:39             ` Benjamin Herrenschmidt
2004-05-25  3:43           ` Andrea Arcangeli
2004-05-25  4:00             ` Linus Torvalds
2004-05-25  4:17               ` Benjamin Herrenschmidt
2004-05-25  4:37                 ` Andrea Arcangeli
2004-05-25  4:40                   ` Benjamin Herrenschmidt
2004-05-25  4:20               ` Andrea Arcangeli
2004-05-25  4:39                 ` Linus Torvalds
2004-05-25  4:44                   ` Linus Torvalds
2004-05-25  4:59                     ` Andrea Arcangeli
2004-05-25  5:09                       ` Andrea Arcangeli
2004-05-25  4:50                   ` Andrea Arcangeli
2004-05-25  4:59                     ` Linus Torvalds
2004-05-25  4:43                 ` David Mosberger
2004-05-25  4:53                   ` Andrea Arcangeli
2004-05-27 21:56                     ` David Mosberger
2004-05-27 22:00                       ` Benjamin Herrenschmidt
2004-05-27 22:12                         ` David Mosberger
2004-05-25 11:44               ` Matthew Wilcox
2004-05-25 14:48                 ` Linus Torvalds
2004-05-25 15:35                   ` Keith M Wesolowski
2004-05-25 16:19                     ` Linus Torvalds
2004-05-25 17:25                       ` David S. Miller
2004-05-25 17:49                         ` Linus Torvalds
2004-05-25 17:54                           ` David S. Miller
2004-05-25 18:05                             ` Linus Torvalds
2004-05-25 20:30                               ` Linus Torvalds
2004-05-25 20:35                               ` David S. Miller
2004-05-25 20:49                                 ` Linus Torvalds
2004-05-25 20:57                                   ` David S. Miller
2004-05-26  6:20                                   ` Keith M Wesolowski
2004-05-25 21:40                               ` Benjamin Herrenschmidt
2004-05-25 21:54                                 ` Linus Torvalds
2004-05-25 22:00                                   ` Linus Torvalds
2004-05-25 22:07                                     ` Benjamin Herrenschmidt
2004-05-25 22:14                                       ` Linus Torvalds
2004-05-26  0:21                                         ` Benjamin Herrenschmidt
2004-05-26  0:50                                           ` Linus Torvalds
2004-05-26  3:25                                             ` Benjamin Herrenschmidt
2004-05-26  4:08                                               ` Linus Torvalds
2004-05-26  4:12                                                 ` Benjamin Herrenschmidt
2004-05-26  4:18                                                   ` Benjamin Herrenschmidt
2004-05-26  4:50                                                     ` Linus Torvalds
2004-05-26  4:49                                                       ` Benjamin Herrenschmidt
2004-05-26  4:28                                                   ` Linus Torvalds
2004-05-26  4:46                                                 ` Benjamin Herrenschmidt
2004-05-26  4:54                                                   ` Linus Torvalds
2004-05-26  4:55                                                     ` Benjamin Herrenschmidt
2004-05-26  5:41                                                     ` Benjamin Herrenschmidt
2004-05-26  5:59                                                     ` [PATCH] (signoff) " Benjamin Herrenschmidt
2004-05-26  6:55                                                       ` Benjamin Herrenschmidt
2004-05-25 22:05                                   ` [PATCH] " Benjamin Herrenschmidt
2004-05-25 22:09                                 ` Linus Torvalds
2004-05-25 22:19                                   ` Benjamin Herrenschmidt
2004-05-25 22:24                                     ` Linus Torvalds
2004-05-25 21:27                   ` Andrea Arcangeli
2004-05-25 21:43                     ` Linus Torvalds
2004-05-25 21:55                       ` Andrea Arcangeli
2004-05-25 22:01                         ` Linus Torvalds
2004-05-25 22:18                           ` Ivan Kokshaysky
2004-05-25 22:42                             ` Andrea Arcangeli
2004-05-26  2:26                               ` Linus Torvalds
2004-05-26  7:06                                 ` Andrea Arcangeli
2004-05-25 21:44                     ` Andrea Arcangeli [this message]
2004-06-01 12:04 Martin Schwidefsky
2004-06-01 12:10 Martin Schwidefsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040525214450.GH29378@dualathlon.random \
    --to=andrea@suse.de \
    --cc=akpm@osdl.org \
    --cc=bcrl@kvack.org \
    --cc=benh@kernel.crashing.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=torvalds@osdl.org \
    --cc=willy@debian.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox