linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Linux Memory Management <linux-mm@kvack.org>
Subject: Re: [PATCH 0/7] abstract pagetable locking and pte updates
Date: Fri, 29 Oct 2004 21:45:57 +1000	[thread overview]
Message-ID: <41822D75.3090802@yahoo.com.au> (raw)
In-Reply-To: <4181EF2D.5000407@yahoo.com.au>

[-- Attachment #1: Type: text/plain, Size: 710 bytes --]

Nick Piggin wrote:
> Hello,
> 
> Following are patches that abstract page table operations to
> allow lockless implementations by using cmpxchg or per-pte locks.
> 

One more patch - this provides a generic framework for pte
locks, and a basic i386 reference implementation (which just
ifdefs out the cmpxchg version). Boots, runs, and has taken
some stressing.

I should have sorted this out before sending the patches for
RFC. The generic code actually did need a few lines of changes,
but not much as you can see. Needs some tidying up though, but
I only just wrote it in a few minutes.

And now before anyone gets a chance to shoot down the whole thing,
I just have to say

	"look ma, no page_table_lock!"

[-- Attachment #2: vm-i386-locked-pte.patch --]
[-- Type: text/x-patch, Size: 11031 bytes --]




---

 linux-2.6-npiggin/include/asm-generic/pgtable.h |  128 +++++++++++++++++++++++-
 linux-2.6-npiggin/include/asm-i386/pgtable.h    |   33 ++++++
 linux-2.6-npiggin/include/linux/mm.h            |    7 -
 linux-2.6-npiggin/kernel/futex.c                |    5 
 linux-2.6-npiggin/mm/memory.c                   |   13 +-
 5 files changed, 174 insertions(+), 12 deletions(-)

diff -puN include/asm-i386/pgtable.h~vm-i386-locked-pte include/asm-i386/pgtable.h
--- linux-2.6/include/asm-i386/pgtable.h~vm-i386-locked-pte	2004-10-29 19:12:15.000000000 +1000
+++ linux-2.6-npiggin/include/asm-i386/pgtable.h	2004-10-29 20:38:38.000000000 +1000
@@ -106,6 +106,8 @@ void paging_init(void);
 #define _PAGE_BIT_UNUSED3	11
 #define _PAGE_BIT_NX		63
 
+#define _PAGE_BIT_LOCKED	9
+
 #define _PAGE_PRESENT	0x001
 #define _PAGE_RW	0x002
 #define _PAGE_USER	0x004
@@ -119,6 +121,8 @@ void paging_init(void);
 #define _PAGE_UNUSED2	0x400
 #define _PAGE_UNUSED3	0x800
 
+#define _PAGE_LOCKED	0x200
+
 #define _PAGE_FILE	0x040	/* set:pagecache unset:swap */
 #define _PAGE_PROTNONE	0x080	/* If not present */
 #ifdef CONFIG_X86_PAE
@@ -231,11 +235,13 @@ static inline pte_t pte_exprotect(pte_t 
 static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_wrprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_mkunlocked(pte_t pte)	{ (pte).pte_low &= ~_PAGE_LOCKED; return pte; }
 static inline pte_t pte_mkread(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkexec(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mklocked(pte_t pte)	{ (pte).pte_low |= _PAGE_LOCKED; return pte; }
 
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
@@ -398,7 +404,32 @@ extern pte_t *lookup_address(unsigned lo
 		}							  \
 	} while (0)
 
-#define __HAVE_ARCH_PTEP_CMPXCHG
+#define __HAVE_ARCH_PTEP_LOCK
+#define ptep_xchg(__ptep, __newval)					\
+({									\
+ 	pte_t ret;							\
+	/* Just need to make sure we keep the _PAGE_BIT_LOCKED bit */	\
+	ret.pte_low = xchg(&(__ptep)->pte_low, (__newval).pte_low);	\
+	ret.pte_high = (__ptep)->pte_high;				\
+	(__ptep)->pte_high = (__newval).pte_high;			\
+	ret;								\
+})
+
+#define ptep_lock(__ptep)						\
+do {									\
+	preempt_disable();						\
+	while (unlikely(test_and_set_bit(_PAGE_BIT_LOCKED, &(__ptep)->pte_low))) \
+		cpu_relax();						\
+} while (0)
+
+#define ptep_unlock(__ptep)						\
+do {									\
+	if (unlikely(!test_and_clear_bit(_PAGE_BIT_LOCKED, &(__ptep)->pte_low))) \
+		BUG();							\
+	preempt_enable();						\
+} while (0)
+
+//#define __HAVE_ARCH_PTEP_CMPXCHG
 
 #ifdef CONFIG_X86_PAE
 #define __HAVE_ARCH_PTEP_ATOMIC_READ
diff -puN include/asm-generic/pgtable.h~vm-i386-locked-pte include/asm-generic/pgtable.h
--- linux-2.6/include/asm-generic/pgtable.h~vm-i386-locked-pte	2004-10-29 19:35:14.000000000 +1000
+++ linux-2.6-npiggin/include/asm-generic/pgtable.h	2004-10-29 20:54:56.000000000 +1000
@@ -135,7 +135,7 @@ static inline void ptep_mkdirty(pte_t *p
 #endif
 
 #ifndef __ASSEMBLY__
-#ifdef __HAVE_ARCH_PTEP_CMPXCHG
+#if defined(__HAVE_ARCH_PTEP_CMPXCHG)
 #define mm_lock_page_table(__mm)					\
 do {									\
 } while (0);
@@ -254,7 +254,130 @@ do {} while (0)
 #define ptep_verify_finish(__pmod, __mm, __ptep)			\
 	ptep_verify(__pmod, __mm, __ptep)
 
-#else /* __HAVE_ARCH_PTEP_CMPXCHG */ /* GENERIC_PTEP_LOCKING follows */
+#elif defined(__HAVE_ARCH_PTEP_LOCK)
+
+#define mm_lock_page_table(__mm)					\
+do {									\
+} while (0);
+
+#define mm_unlock_page_table(__mm)					\
+do {									\
+} while (0);
+
+#define mm_pin_pages(__mm)						\
+do {									\
+} while (0)
+
+#define mm_unpin_pages(__mm)						\
+do {									\
+} while (0)
+
+#define ptep_pin_pages(__mm, __ptep)					\
+do {									\
+	ptep_lock(__ptep);						\
+} while (0)
+
+#define ptep_unpin_pages(__mm, __ptep)					\
+do {									\
+	ptep_unlock(__ptep);						\
+} while (0)
+
+/* mm_lock_page_table doesn't actually take a lock, so this can be 0 */
+#define MM_RELOCK_CHECK 0
+
+struct pte_modify {
+};
+
+#ifndef __HAVE_ARCH_PTEP_ATOMIC_READ
+#define ptep_atomic_read(__ptep)					\
+({									\
+	*__ptep;							\
+})
+#endif
+
+#define ptep_begin_modify(__pmod, __mm, __ptep)				\
+({									\
+ 	(void)__pmod;							\
+ 	(void)__mm;							\
+ 	ptep_lock(__ptep);						\
+ 	pte_mkunlocked(*(__ptep));					\
+})
+
+#define ptep_abort(__pmod, __mm, __ptep)				\
+do { ptep_unlock(__ptep); } while (0)
+
+#define ptep_commit(__pmod, __mm, __ptep, __newval)			\
+({									\
+ 	*(__ptep) = pte_mklocked(__newval);				\
+ 	ptep_unlock(__ptep);						\
+ 	0;								\
+})
+
+#define ptep_commit_flush(__pmod, __mm, __vma, __address, __ptep, __newval) \
+({									\
+ 	ptep_commit(__pmod, __mm, __ptep, __newval);			\
+	flush_tlb_page(__vma, __address);				\
+ 	0;								\
+})
+
+#define ptep_commit_access_flush(__pmod, __mm, __vma, __address, __ptep, __newval, __dirty) \
+({									\
+ 	ptep_set_access_flags(__vma, __address, __ptep,			\
+				pte_mklocked(__newval), __dirty);	\
+ 	ptep_unlock(__ptep);						\
+	flush_tlb_page(__vma, __address);				\
+ 	0;								\
+})
+
+#define ptep_commit_establish_flush(__pmod, __mm, __vma, __address, __ptep, __newval) \
+({									\
+ 	ptep_establish(__vma, __address, __ptep, pte_mklocked(__newval)); \
+ 	ptep_unlock(__ptep);						\
+	flush_tlb_page(__vma, __address);				\
+	0;								\
+})
+
+#define ptep_commit_clear(__pmod, __mm, __ptep, __newval, __oldval) 	\
+({									\
+ 	__oldval = ptep_xchg(__ptep, pte_mklocked(__newval));		\
+ 	__oldval = pte_mkunlocked(__oldval);				\
+ 	ptep_unlock(__ptep);						\
+ 	0;								\
+})
+
+#define ptep_commit_clear_flush(__pmod, __mm, __vma, __address, __ptep, __newval, __oldval) \
+({									\
+ 	ptep_commit_clear(__pmod, __mm, __ptep, __newval, __oldval);	\
+	flush_tlb_page(__vma, __address);				\
+ 	0;								\
+})
+
+#define ptep_commit_clear_flush_young(__pmod, __mm, __vma, __address, __ptep, __young) \
+({									\
+ 	*__young = ptep_clear_flush_young(__vma, __address, __ptep);    \
+ 	ptep_unlock(__ptep);						\
+ 	0;								\
+})
+
+#define ptep_commit_clear_flush_dirty(__pmod, __mm, __vma, __address, __ptep, __dirty) \
+({									\
+ 	*__dirty = ptep_clear_flush_dirty(__vma, __address, __ptep);    \
+ 	ptep_unlock(__ptep);						\
+ 	0;								\
+})
+
+#define ptep_verify(__pmod, __mm, __ptep)				\
+({									\
+ 	0;								\
+})
+
+#define ptep_verify_finish(__pmod, __mm, __ptep)			\
+({									\
+ 	ptep_unlock(__ptep);						\
+ 	0;								\
+})
+
+#else /* __HAVE_ARCH_PTEP_LOCK */ /* GENERIC_PTEP_LOCKING follows */
 /* Use the generic mm->page_table_lock serialised scheme */
 /*
  * XXX: can we make use of this?
@@ -339,6 +462,7 @@ struct pte_modify {
 ({									\
  	(void)__pmod;							\
  	(void)__mm;							\
+ 	/* XXX: needn't be atomic? */					\
  	ptep_atomic_read(__ptep);					\
 })
 
diff -puN mm/memory.c~vm-i386-locked-pte mm/memory.c
--- linux-2.6/mm/memory.c~vm-i386-locked-pte	2004-10-29 20:01:32.000000000 +1000
+++ linux-2.6-npiggin/mm/memory.c	2004-10-29 21:18:31.000000000 +1000
@@ -689,8 +689,9 @@ void zap_page_range(struct vm_area_struc
 	unmap_vmas(mm, vma, address, end, &nr_accounted, details);
 }
 
-void follow_page_finish(struct mm_struct *mm, unsigned long address)
+void follow_page_finish(struct mm_struct *mm, pte_t *p, unsigned long address)
 {
+	ptep_unpin_pages(mm, p);
 	mm_unpin_pages(mm);
 	mm_unlock_page_table(mm);
 }
@@ -699,7 +700,7 @@ void follow_page_finish(struct mm_struct
  * Do a quick page-table lookup for a single page.
  */
 struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write) 
+follow_page(struct mm_struct *mm, pte_t **p, unsigned long address, int write)
 {
 	pgd_t *pgd;
 	pmd_t *pmd;
@@ -732,6 +733,7 @@ follow_page(struct mm_struct *mm, unsign
 	 * page with get_page? 
 	 */
 	mm_pin_pages(mm);
+	ptep_pin_pages(mm, ptep);
 	pte = ptep_atomic_read(ptep);
 	pte_unmap(ptep);
 
@@ -744,11 +746,13 @@ follow_page(struct mm_struct *mm, unsign
 			if (write && !pte_dirty(pte) && !PageDirty(page))
 				set_page_dirty(page);
 			mark_page_accessed(page);
+			*p = ptep;
 			return page;
 		}
 	}
 
 out_unpin:
+	ptep_unpin_pages(mm, ptep);
 	mm_unpin_pages(mm);
 out:
 	mm_unlock_page_table(mm);
@@ -850,9 +854,10 @@ int get_user_pages(struct task_struct *t
 			continue;
 		}
 		do {
+			pte_t *p;
 			struct page *page;
 			int lookup_write = write;
-			while (!(page = follow_page(mm, start, lookup_write))) {
+			while (!(page = follow_page(mm, &p, start, lookup_write))) {
 				/*
 				 * Shortcut for anonymous pages. We don't want
 				 * to force the creation of pages tables for
@@ -896,7 +901,7 @@ int get_user_pages(struct task_struct *t
 					page_cache_get(page);
 			}
 			if (page)
-				follow_page_finish(mm, start);
+				follow_page_finish(mm, p, start);
 set_vmas:
 			if (vmas)
 				vmas[i] = vma;
diff -puN kernel/futex.c~vm-i386-locked-pte kernel/futex.c
--- linux-2.6/kernel/futex.c~vm-i386-locked-pte	2004-10-29 21:13:50.000000000 +1000
+++ linux-2.6-npiggin/kernel/futex.c	2004-10-29 21:18:11.000000000 +1000
@@ -144,6 +144,7 @@ static int get_futex_key(unsigned long u
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct page *page;
+	pte_t *p;
 	int err;
 
 	/*
@@ -204,11 +205,11 @@ static int get_futex_key(unsigned long u
 	/*
 	 * Do a quick atomic lookup first - this is the fastpath.
 	 */
-	page = follow_page(mm, uaddr, 0);
+	page = follow_page(mm, &p, uaddr, 0);
 	if (likely(page != NULL)) {
 		key->shared.pgoff =
 			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-		follow_page_finish(mm, uaddr);
+		follow_page_finish(mm, p, uaddr);
 		return 0;
 	}
 
diff -puN include/linux/mm.h~vm-i386-locked-pte include/linux/mm.h
--- linux-2.6/include/linux/mm.h~vm-i386-locked-pte	2004-10-29 21:14:05.000000000 +1000
+++ linux-2.6-npiggin/include/linux/mm.h	2004-10-29 21:17:48.000000000 +1000
@@ -756,9 +756,10 @@ static inline unsigned long vma_pages(st
 extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
 
 extern struct page * vmalloc_to_page(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
-		int write);
-extern void follow_page_finish(struct mm_struct *mm, unsigned long address);
+extern struct page * follow_page(struct mm_struct *mm, pte_t **p,
+				unsigned long address, int write);
+extern void follow_page_finish(struct mm_struct *mm, pte_t *p,
+				unsigned long address);
 int remap_pfn_range(struct vm_area_struct *, unsigned long,
 		unsigned long, unsigned long, pgprot_t);
 

_

  parent reply	other threads:[~2004-10-29 11:45 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-29  7:20 Nick Piggin
2004-10-29  7:20 ` [PATCH 1/7] " Nick Piggin
2004-10-29  7:21   ` [PATCH 2/7] " Nick Piggin
2004-10-29  7:21     ` [PATCH 3/7] " Nick Piggin
2004-10-29  7:21       ` [PATCH 4/7] " Nick Piggin
2004-10-29  7:22         ` [PATCH 5/7] " Nick Piggin
2004-10-29  7:23           ` [PATCH 6/7] " Nick Piggin
2004-10-29  7:23             ` [PATCH 7/7] " Nick Piggin
2004-10-29  7:46 ` [PATCH 0/7] " William Lee Irwin III
2004-11-02  0:15   ` Christoph Lameter
2004-11-02  0:54     ` William Lee Irwin III
2004-11-02  1:34       ` Nick Piggin
2004-11-02  1:55         ` William Lee Irwin III
2004-11-02  2:38           ` Nick Piggin
2004-11-02  6:57             ` William Lee Irwin III
2004-11-02 17:55         ` Christoph Lameter
2004-10-29 11:45 ` Nick Piggin [this message]
2004-10-29 20:52   ` William Lee Irwin III
2004-10-30  2:46     ` Nick Piggin
2004-11-02  0:19       ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41822D75.3090802@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox