linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
To: 'Jan Beulich' <JBeulich@novell.com>,
	Nishanth Aravamudan <nacc@us.ibm.com>
Cc: david@gibson.dropbear.id.au, linux-mm@kvack.org,
	Andreas Kleen <ak@suse.de>,
	agl@us.ibm.com, discuss@x86-64.org
Subject: RE: [discuss] Re: BUG in x86_64 hugepage support
Date: Wed, 15 Mar 2006 02:03:00 -0800	[thread overview]
Message-ID: <200603151003.k2FA30g14232@unix-os.sc.intel.com> (raw)
In-Reply-To: <4417E359.76F0.0078.0@novell.com>

Nishanth Aravamudan wrote on Tuesday, March 14, 2006 11:31 PM
> Description: We currently fail mprotect testing in libhugetlbfs because
> the PSE bit in the hugepage PTEs gets unset. In the case where we know
> that a filled hugetlb PTE is going to have its protection changed, make
> sure it stays a hugetlb PTE by setting the PSE bit in the new protection
> flags.

Jan Beulich wrote on Wednesday, March 15, 2006 12:50 AM
> This is architecture independent code - you shouldn't be using
> _PAGE_PSE here. Probably x86-64 (and then likely also i386) should
> define their own set_huge_pte_at(), and use that# to or in the
> needed flag?


Yeah, that will do.  i386, x86_64 should also clean up pte_mkhuge() macro.
The unconditional setting of _PAGE_PRESENT bit was a leftover stuff from
the good'old day of pre-faulting hugetlb page.  



[patch] fix i386/x86-64 _PAGE_PSE bit when changing page protection

On i386 and x86-64, pte flag _PAGE_PSE collides with _PAGE_PROTNONE.
The identify of hugetlb pte is lost when changing page protection
via mprotect. A page fault occurs later will trigger a bug check in
huge_pte_alloc().

The fix is to always make new pte a hugetlb pte and also to clean up
legacy code where _PAGE_PRESENT is forced on in the pre-faulting day.


Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>


diff -Nurp linux-2.6.15/include/asm-i386/pgtable.h linux-2.6.15-mm/include/asm-i386/pgtable.h
--- linux-2.6.15/include/asm-i386/pgtable.h	2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15-mm/include/asm-i386/pgtable.h	2006-03-15 00:35:03.000000000 -0800
@@ -219,13 +219,12 @@ extern unsigned long pg0[];
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
 static inline int pte_user(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
 static inline int pte_read(pte_t pte)		{ return (pte).pte_low & _PAGE_USER; }
 static inline int pte_dirty(pte_t pte)		{ return (pte).pte_low & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return (pte).pte_low & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte)		{ return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte)		{ return (pte).pte_low & _PAGE_PSE; }
 
 /*
  * The following only works if pte_present() is not true.
@@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return pte; }
 
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
diff -Nurp linux-2.6.15/include/asm-ia64/pgtable.h linux-2.6.15-mm/include/asm-ia64/pgtable.h
--- linux-2.6.15/include/asm-ia64/pgtable.h	2006-03-15 00:46:18.000000000 -0800
+++ linux-2.6.15-mm/include/asm-ia64/pgtable.h	2006-03-14 21:53:00.000000000 -0800
@@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr
 #define pte_mkyoung(pte)	(__pte(pte_val(pte) | _PAGE_A))
 #define pte_mkclean(pte)	(__pte(pte_val(pte) & ~_PAGE_D))
 #define pte_mkdirty(pte)	(__pte(pte_val(pte) | _PAGE_D))
-#define pte_mkhuge(pte)		(__pte(pte_val(pte) | _PAGE_P))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte)))
 
 /*
  * Macro to a page protection value as "uncacheable".  Note that "protection" is really a
diff -Nurp linux-2.6.15/include/asm-x86_64/pgtable.h linux-2.6.15-mm/include/asm-x86_64/pgtable.h
--- linux-2.6.15/include/asm-x86_64/pgtable.h	2006-03-15 00:30:16.000000000 -0800
+++ linux-2.6.15-mm/include/asm-x86_64/pgtable.h	2006-03-15 00:35:55.000000000 -0800
@@ -273,7 +272,7 @@ static inline int pte_dirty(pte_t pte)		
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_RW; }
 static inline int pte_file(pte_t pte)		{ return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte)		{ return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte)		{ return pte_val(pte) & _PAGE_PSE; }
 
 static inline pte_t pte_rdprotect(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
 static inline pte_t pte_exprotect(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
@@ -285,7 +284,7 @@ static inline pte_t pte_mkexec(pte_t pte
 static inline pte_t pte_mkdirty(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
-static inline pte_t pte_mkhuge(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
 
 struct vm_area_struct;
 
diff -Nurp linux-2.6.15/mm/hugetlb.c linux-2.6.15-mm/mm/hugetlb.c
--- linux-2.6.15/mm/hugetlb.c	2006-03-15 00:30:20.000000000 -0800
+++ linux-2.6.15-mm/mm/hugetlb.c	2006-03-14 23:49:55.000000000 -0800
@@ -731,7 +731,7 @@ void hugetlb_change_protection(struct vm
 			continue;
 		if (!pte_none(*ptep)) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
-			pte = pte_modify(pte, newprot);
+			pte = pte_mkhuge(pte_modify(pte, newprot));
 			set_huge_pte_at(mm, address, ptep, pte);
 			lazy_mmu_prot_update(pte);
 		}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2006-03-15 10:03 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-15  1:20 Nishanth Aravamudan
2006-03-15  4:03 ` Chen, Kenneth W
2006-03-15  4:35   ` Nishanth Aravamudan
2006-03-15  7:08     ` Chen, Kenneth W
2006-03-15  7:30       ` Nishanth Aravamudan
2006-03-15  8:50         ` [discuss] " Jan Beulich
2006-03-15 10:03           ` Chen, Kenneth W [this message]
2006-03-15 15:14             ` Nishanth Aravamudan
2006-03-15 15:56             ` Nishanth Aravamudan
2006-03-15 15:13           ` Nishanth Aravamudan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200603151003.k2FA30g14232@unix-os.sc.intel.com \
    --to=kenneth.w.chen@intel.com \
    --cc=JBeulich@novell.com \
    --cc=agl@us.ibm.com \
    --cc=ak@suse.de \
    --cc=david@gibson.dropbear.id.au \
    --cc=discuss@x86-64.org \
    --cc=linux-mm@kvack.org \
    --cc=nacc@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox