linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Andrea Arcangeli <aarcange@redhat.com>
Cc: linux-mm@kvack.org, Marcelo Tosatti <mtosatti@redhat.com>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Izik Eidus <ieidus@redhat.com>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Andi Kleen <andi@firstfloor.org>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Ingo Molnar <mingo@elte.hu>, Mike Travis <travis@sgi.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Chris Wright <chrisw@sous-sol.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH 10 of 28] add pmd mangling functions to x86
Date: Fri, 18 Dec 2009 18:56:02 +0000	[thread overview]
Message-ID: <20091218185602.GD21194@csn.ul.ie> (raw)
In-Reply-To: <a77787d44f25abf69338.1261076413@v2.random>

(As a side-note, I am going off-line until after the new years fairly soon.
I'm not doing a proper review at the moment, just taking a first read to
see what's here. Sorry I didn't get a chance to read V1)

On Thu, Dec 17, 2009 at 07:00:13PM -0000, Andrea Arcangeli wrote:
> From: Andrea Arcangeli <aarcange@redhat.com>
> 
> Add needed pmd mangling functions with simmetry with their pte counterparts.

Silly question, this assumes the bits used in the PTE are not being used in
the PMD for something else, right? Is that guaranteed to be safe? According
to the AMD manual, it's fine but is it typically true on other architectures?

> pmdp_freeze_flush is the only exception only present on the pmd side and it's
> needed to serialize the VM against split_huge_page, it simply atomically clears
> the present bit in the same way pmdp_clear_flush_young atomically clears the
> accessed bit (and both need to flush the tlb to make it effective, which is
> mandatory to happen synchronously for pmdp_freeze_flush).
> 
> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>

One minorish nit below.

> ---
> 
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -95,11 +95,21 @@ static inline int pte_young(pte_t pte)
>  	return pte_flags(pte) & _PAGE_ACCESSED;
>  }
>  
> +static inline int pmd_young(pmd_t pmd)
> +{
> +	return pmd_flags(pmd) & _PAGE_ACCESSED;
> +}
> +
>  static inline int pte_write(pte_t pte)
>  {
>  	return pte_flags(pte) & _PAGE_RW;
>  }
>  
> +static inline int pmd_write(pmd_t pmd)
> +{
> +	return pmd_flags(pmd) & _PAGE_RW;
> +}
> +
>  static inline int pte_file(pte_t pte)
>  {
>  	return pte_flags(pte) & _PAGE_FILE;
> @@ -150,6 +160,13 @@ static inline pte_t pte_set_flags(pte_t 
>  	return native_make_pte(v | set);
>  }
>  
> +static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
> +{
> +	pmdval_t v = native_pmd_val(pmd);
> +
> +	return native_make_pmd(v | set);
> +}
> +
>  static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
>  {
>  	pteval_t v = native_pte_val(pte);
> @@ -157,6 +174,13 @@ static inline pte_t pte_clear_flags(pte_
>  	return native_make_pte(v & ~clear);
>  }
>  
> +static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
> +{
> +	pmdval_t v = native_pmd_val(pmd);
> +
> +	return native_make_pmd(v & ~clear);
> +}
> +
>  static inline pte_t pte_mkclean(pte_t pte)
>  {
>  	return pte_clear_flags(pte, _PAGE_DIRTY);
> @@ -167,11 +191,21 @@ static inline pte_t pte_mkold(pte_t pte)
>  	return pte_clear_flags(pte, _PAGE_ACCESSED);
>  }
>  
> +static inline pmd_t pmd_mkold(pmd_t pmd)
> +{
> +	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
> +}
> +
>  static inline pte_t pte_wrprotect(pte_t pte)
>  {
>  	return pte_clear_flags(pte, _PAGE_RW);
>  }
>  
> +static inline pmd_t pmd_wrprotect(pmd_t pmd)
> +{
> +	return pmd_clear_flags(pmd, _PAGE_RW);
> +}
> +
>  static inline pte_t pte_mkexec(pte_t pte)
>  {
>  	return pte_clear_flags(pte, _PAGE_NX);
> @@ -182,16 +216,36 @@ static inline pte_t pte_mkdirty(pte_t pt
>  	return pte_set_flags(pte, _PAGE_DIRTY);
>  }
>  
> +static inline pmd_t pmd_mkdirty(pmd_t pmd)
> +{
> +	return pmd_set_flags(pmd, _PAGE_DIRTY);
> +}
> +
> +static inline pmd_t pmd_mkhuge(pmd_t pmd)
> +{
> +	return pmd_set_flags(pmd, _PAGE_PSE);
> +}
> +
>  static inline pte_t pte_mkyoung(pte_t pte)
>  {
>  	return pte_set_flags(pte, _PAGE_ACCESSED);
>  }
>  
> +static inline pmd_t pmd_mkyoung(pmd_t pmd)
> +{
> +	return pmd_set_flags(pmd, _PAGE_ACCESSED);
> +}
> +
>  static inline pte_t pte_mkwrite(pte_t pte)
>  {
>  	return pte_set_flags(pte, _PAGE_RW);
>  }
>  
> +static inline pmd_t pmd_mkwrite(pmd_t pmd)
> +{
> +	return pmd_set_flags(pmd, _PAGE_RW);
> +}
> +
>  static inline pte_t pte_mkhuge(pte_t pte)
>  {
>  	return pte_set_flags(pte, _PAGE_PSE);
> @@ -320,6 +374,11 @@ static inline int pte_same(pte_t a, pte_
>  	return a.pte == b.pte;
>  }
>  
> +static inline int pmd_same(pmd_t a, pmd_t b)
> +{
> +	return a.pmd == b.pmd;
> +}
> +
>  static inline int pte_present(pte_t a)
>  {
>  	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
> @@ -351,7 +410,7 @@ static inline unsigned long pmd_page_vad
>   * Currently stuck as a macro due to indirect forward reference to
>   * linux/mmzone.h's __section_mem_map_addr() definition:
>   */
> -#define pmd_page(pmd)	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
> +#define pmd_page(pmd)	pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
>  

Why is the masking with PTE_PFN_MASK now necessary?

>  /*
>   * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
> @@ -372,6 +431,7 @@ static inline unsigned long pmd_index(un
>   * to linux/mm.h:page_to_nid())
>   */
>  #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
> +#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
>  
>  /*
>   * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
> @@ -568,14 +628,21 @@ struct vm_area_struct;
>  extern int ptep_set_access_flags(struct vm_area_struct *vma,
>  				 unsigned long address, pte_t *ptep,
>  				 pte_t entry, int dirty);
> +extern int pmdp_set_access_flags(struct vm_area_struct *vma,
> +				 unsigned long address, pmd_t *pmdp,
> +				 pmd_t entry, int dirty);
>  
>  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
>  extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
>  				     unsigned long addr, pte_t *ptep);
> +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> +				     unsigned long addr, pmd_t *pmdp);
>  
>  #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
>  extern int ptep_clear_flush_young(struct vm_area_struct *vma,
>  				  unsigned long address, pte_t *ptep);
> +extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
> +				  unsigned long address, pmd_t *pmdp);
>  
>  #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
>  static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> @@ -586,6 +653,14 @@ static inline pte_t ptep_get_and_clear(s
>  	return pte;
>  }
>  
> +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr,
> +				       pmd_t *pmdp)
> +{
> +	pmd_t pmd = native_pmdp_get_and_clear(pmdp);
> +	pmd_update(mm, addr, pmdp);
> +	return pmd;
> +}
> +
>  #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
>  static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
>  					    unsigned long addr, pte_t *ptep,
> @@ -612,6 +687,16 @@ static inline void ptep_set_wrprotect(st
>  	pte_update(mm, addr, ptep);
>  }
>  
> +static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> +				      unsigned long addr, pmd_t *pmdp)
> +{
> +	clear_bit(_PAGE_BIT_RW, (unsigned long *)&pmdp->pmd);
> +	pmd_update(mm, addr, pmd);
> +}
> +
> +extern void pmdp_splitting_flush(struct vm_area_struct *vma,
> +				 unsigned long addr, pmd_t *pmdp);
> +
>  /*
>   * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
>   *
> diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
> --- a/arch/x86/include/asm/pgtable_64.h
> +++ b/arch/x86/include/asm/pgtable_64.h
> @@ -71,6 +71,18 @@ static inline pte_t native_ptep_get_and_
>  	return ret;
>  #endif
>  }
> +static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
> +{
> +#ifdef CONFIG_SMP
> +	return native_make_pmd(xchg(&xp->pmd, 0));
> +#else
> +	/* native_local_pmdp_get_and_clear,
> +	   but duplicated because of cyclic dependency */
> +	pmd_t ret = *xp;
> +	native_pmd_clear(NULL, 0, xp);
> +	return ret;
> +#endif
> +}
>  
>  static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
>  {
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -288,6 +288,23 @@ int ptep_set_access_flags(struct vm_area
>  	return changed;
>  }
>  
> +int pmdp_set_access_flags(struct vm_area_struct *vma,
> +			  unsigned long address, pmd_t *pmdp,
> +			  pmd_t entry, int dirty)
> +{
> +	int changed = !pmd_same(*pmdp, entry);
> +
> +	VM_BUG_ON(address & ~HPAGE_MASK);
> +

On the use of HPAGE_MASK, did you intend to use the PMD mask? Granted,
it works out as being the same thing in this context but if there is
ever support for 1GB pages at the next page table level, it could get
confusing.

> +	if (changed && dirty) {
> +		*pmdp = entry;
> +		pmd_update_defer(vma->vm_mm, address, pmdp);
> +		flush_tlb_range(vma, address, address + HPAGE_SIZE);
> +	}
> +
> +	return changed;
> +}
> +
>  int ptep_test_and_clear_young(struct vm_area_struct *vma,
>  			      unsigned long addr, pte_t *ptep)
>  {
> @@ -303,6 +320,21 @@ int ptep_test_and_clear_young(struct vm_
>  	return ret;
>  }
>  
> +int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> +			      unsigned long addr, pmd_t *pmdp)
> +{
> +	int ret = 0;
> +
> +	if (pmd_young(*pmdp))
> +		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
> +					 (unsigned long *) &pmdp->pmd);
> +
> +	if (ret)
> +		pmd_update(vma->vm_mm, addr, pmdp);
> +
> +	return ret;
> +}
> +
>  int ptep_clear_flush_young(struct vm_area_struct *vma,
>  			   unsigned long address, pte_t *ptep)
>  {
> @@ -315,6 +347,34 @@ int ptep_clear_flush_young(struct vm_are
>  	return young;
>  }
>  
> +int pmdp_clear_flush_young(struct vm_area_struct *vma,
> +			   unsigned long address, pmd_t *pmdp)
> +{
> +	int young;
> +
> +	VM_BUG_ON(address & ~HPAGE_MASK);
> +
> +	young = pmdp_test_and_clear_young(vma, address, pmdp);
> +	if (young)
> +		flush_tlb_range(vma, address, address + HPAGE_SIZE);
> +
> +	return young;
> +}
> +
> +void pmdp_splitting_flush(struct vm_area_struct *vma,
> +			  unsigned long address, pmd_t *pmdp)
> +{
> +	int set;
> +	VM_BUG_ON(address & ~HPAGE_MASK);
> +	set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
> +				(unsigned long *)&pmdp->pmd);
> +	if (set) {
> +		pmd_update(vma->vm_mm, address, pmdp);
> +		/* need tlb flush only to serialize against gup-fast */
> +		flush_tlb_range(vma, address, address + HPAGE_SIZE);
> +	}
> +}
> +
>  /**
>   * reserve_top_address - reserves a hole in the top of kernel address space
>   * @reserve - size of hole to reserve
> 

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-12-18 18:56 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-17 19:00 [PATCH 00 of 28] Transparent Hugepage support #2 Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 01 of 28] compound_lock Andrea Arcangeli
2009-12-17 19:46   ` Christoph Lameter
2009-12-18 14:27     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 02 of 28] alter compound get_page/put_page Andrea Arcangeli
2009-12-17 19:50   ` Christoph Lameter
2009-12-18 14:30     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 03 of 28] clear compound mapping Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 04 of 28] add native_set_pmd_at Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 05 of 28] add pmd paravirt ops Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 06 of 28] no paravirt version of pmd ops Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 07 of 28] export maybe_mkwrite Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 08 of 28] comment reminder in destroy_compound_page Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 09 of 28] config_transparent_hugepage Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 10 of 28] add pmd mangling functions to x86 Andrea Arcangeli
2009-12-18 18:56   ` Mel Gorman [this message]
2009-12-19 15:27     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 11 of 28] add pmd mangling generic functions Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 12 of 28] special pmd_trans_* functions Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 13 of 28] bail out gup_fast on freezed pmd Andrea Arcangeli
2009-12-18 18:59   ` Mel Gorman
2009-12-19 15:48     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 14 of 28] pte alloc trans splitting Andrea Arcangeli
2009-12-18 19:03   ` Mel Gorman
2009-12-19 15:59     ` Andrea Arcangeli
2009-12-21 19:57       ` Mel Gorman
2009-12-17 19:00 ` [PATCH 15 of 28] add pmd mmu_notifier helpers Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 16 of 28] clear page compound Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 17 of 28] add pmd_huge_pte to mm_struct Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 18 of 28] ensure mapcount is taken on head pages Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 19 of 28] split_huge_page_mm/vma Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 20 of 28] split_huge_page paging Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 21 of 28] pmd_trans_huge migrate bugcheck Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 22 of 28] clear_huge_page fix Andrea Arcangeli
2009-12-18 19:16   ` Mel Gorman
2009-12-17 19:00 ` [PATCH 23 of 28] clear_copy_huge_page Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 24 of 28] kvm mmu transparent hugepage support Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 25 of 28] transparent hugepage core Andrea Arcangeli
2009-12-18 20:03   ` Mel Gorman
2009-12-19 16:41     ` Andrea Arcangeli
2009-12-21 20:31       ` Mel Gorman
2009-12-23  0:06         ` Andrea Arcangeli
2009-12-23  6:09           ` Paul Mundt
2010-01-03 18:38           ` Mel Gorman
2010-01-04 15:49             ` Andrea Arcangeli
2010-01-04 16:58             ` Christoph Lameter
2010-01-04  6:16   ` Daisuke Nishimura
2010-01-04 16:04     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 26 of 28] madvise(MADV_HUGEPAGE) Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 27 of 28] memcg compound Andrea Arcangeli
2009-12-18  1:27   ` KAMEZAWA Hiroyuki
2009-12-18 16:02     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 28 of 28] memcg huge memory Andrea Arcangeli
2009-12-18  1:33   ` KAMEZAWA Hiroyuki
2009-12-18 16:04     ` Andrea Arcangeli
2009-12-18 23:06       ` KAMEZAWA Hiroyuki
2009-12-20 18:39         ` Andrea Arcangeli
2009-12-21  0:26           ` KAMEZAWA Hiroyuki
2009-12-21  1:24             ` Daisuke Nishimura
2009-12-21  3:52               ` KAMEZAWA Hiroyuki
2009-12-21  4:33                 ` Daisuke Nishimura
2009-12-25  4:17                   ` Daisuke Nishimura
2009-12-25  4:37                     ` KAMEZAWA Hiroyuki
2009-12-24 10:00   ` Balbir Singh
2009-12-24 11:40     ` Andrea Arcangeli
2009-12-24 12:07       ` Balbir Singh
2009-12-17 19:54 ` [PATCH 00 of 28] Transparent Hugepage support #2 Christoph Lameter
2009-12-17 19:58   ` Rik van Riel
2009-12-17 20:09     ` Christoph Lameter
2009-12-18  5:12       ` Ingo Molnar
2009-12-18  6:18         ` KOSAKI Motohiro
2009-12-18 18:28         ` Christoph Lameter
2009-12-18 18:41           ` Dave Hansen
2009-12-18 19:17             ` Mike Travis
2009-12-18 19:28               ` Swap on flash SSDs Dave Hansen
2009-12-18 19:38                 ` Andi Kleen
2009-12-18 19:39                 ` Ingo Molnar
2009-12-18 20:13                   ` Linus Torvalds
2009-12-18 20:31                     ` Ingo Molnar
2009-12-19 18:38                   ` Jörn Engel
2009-12-18 14:05       ` [PATCH 00 of 28] Transparent Hugepage support #2 Andrea Arcangeli
2009-12-18 18:33         ` Christoph Lameter
2009-12-19 15:09           ` Andrea Arcangeli
2009-12-17 20:47     ` Mike Travis
2009-12-18  3:28       ` Rik van Riel
2009-12-18 14:12       ` Andrea Arcangeli
2009-12-18 12:52     ` Avi Kivity
2009-12-18 18:47 ` Dave Hansen
2009-12-19 15:20   ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091218185602.GD21194@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=chrisw@sous-sol.org \
    --cc=cl@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=ieidus@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=mtosatti@redhat.com \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox