linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
To: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Vlastimil Babka" <vbabka@suse.cz>,
	"Davidlohr Bueso" <dave@stgolabs.net>,
	"Michal Hocko" <mhocko@kernel.org>,
	"Andrew Morton" <akpm@linux-foundation.org>
Subject: Re: [PATCH v6 2/2] hugetlb: take PMD sharing into account when flushing tlb/caches
Date: Fri, 24 Aug 2018 03:07:46 +0000	[thread overview]
Message-ID: <20180824030746.GB31674@hori1.linux.bs1.fc.nec.co.jp> (raw)
In-Reply-To: <20180823205917.16297-3-mike.kravetz@oracle.com>

On Thu, Aug 23, 2018 at 01:59:17PM -0700, Mike Kravetz wrote:
> When fixing an issue with PMD sharing and migration, it was discovered
> via code inspection that other callers of huge_pmd_unshare potentially
> have an issue with cache and tlb flushing.
> 
> Use the routine adjust_range_if_pmd_sharing_possible() to calculate
> worst case ranges for mmu notifiers.  Ensure that this range is flushed
> if huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.

s/PUD_SUZE/PUD_SIZE/

> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Looks good to me.

Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>

> ---
>  mm/hugetlb.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 44 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a73c5728e961..082cddf46b4f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3333,8 +3333,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	struct page *page;
>  	struct hstate *h = hstate_vma(vma);
>  	unsigned long sz = huge_page_size(h);
> -	const unsigned long mmun_start = start;	/* For mmu_notifiers */
> -	const unsigned long mmun_end   = end;	/* For mmu_notifiers */
> +	unsigned long mmun_start = start;	/* For mmu_notifiers */
> +	unsigned long mmun_end   = end;		/* For mmu_notifiers */
>  
>  	WARN_ON(!is_vm_hugetlb_page(vma));
>  	BUG_ON(start & ~huge_page_mask(h));
> @@ -3346,6 +3346,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	 */
>  	tlb_remove_check_page_size_change(tlb, sz);
>  	tlb_start_vma(tlb, vma);
> +
> +	/*
> +	 * If sharing possible, alert mmu notifiers of worst case.
> +	 */
> +	adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
>  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>  	address = start;
>  	for (; address < end; address += sz) {
> @@ -3356,6 +3361,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		ptl = huge_pte_lock(h, mm, ptep);
>  		if (huge_pmd_unshare(mm, &address, ptep)) {
>  			spin_unlock(ptl);
> +			/*
> +			 * We just unmapped a page of PMDs by clearing a PUD.
> +			 * The caller's TLB flush range should cover this area.
> +			 */
>  			continue;
>  		}
>  
> @@ -3438,12 +3447,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
>  {
>  	struct mm_struct *mm;
>  	struct mmu_gather tlb;
> +	unsigned long tlb_start = start;
> +	unsigned long tlb_end = end;
> +
> +	/*
> +	 * If shared PMDs were possibly used within this vma range, adjust
> +	 * start/end for worst case tlb flushing.
> +	 * Note that we can not be sure if PMDs are shared until we try to
> +	 * unmap pages.  However, we want to make sure TLB flushing covers
> +	 * the largest possible range.
> +	 */
> +	adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
>  
>  	mm = vma->vm_mm;
>  
> -	tlb_gather_mmu(&tlb, mm, start, end);
> +	tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
>  	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
> -	tlb_finish_mmu(&tlb, start, end);
> +	tlb_finish_mmu(&tlb, tlb_start, tlb_end);
>  }
>  
>  /*
> @@ -4309,11 +4329,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  	pte_t pte;
>  	struct hstate *h = hstate_vma(vma);
>  	unsigned long pages = 0;
> +	unsigned long f_start = start;
> +	unsigned long f_end = end;
> +	bool shared_pmd = false;
> +
> +	/*
> +	 * In the case of shared PMDs, the area to flush could be beyond
> +	 * start/end.  Set f_start/f_end to cover the maximum possible
> +	 * range if PMD sharing is possible.
> +	 */
> +	adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
>  
>  	BUG_ON(address >= end);
> -	flush_cache_range(vma, address, end);
> +	flush_cache_range(vma, f_start, f_end);
>  
> -	mmu_notifier_invalidate_range_start(mm, start, end);
> +	mmu_notifier_invalidate_range_start(mm, f_start, f_end);
>  	i_mmap_lock_write(vma->vm_file->f_mapping);
>  	for (; address < end; address += huge_page_size(h)) {
>  		spinlock_t *ptl;
> @@ -4324,6 +4354,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  		if (huge_pmd_unshare(mm, &address, ptep)) {
>  			pages++;
>  			spin_unlock(ptl);
> +			shared_pmd = true;
>  			continue;
>  		}
>  		pte = huge_ptep_get(ptep);
> @@ -4359,9 +4390,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  	 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
>  	 * may have cleared our pud entry and done put_page on the page table:
>  	 * once we release i_mmap_rwsem, another task can do the final put_page
> -	 * and that page table be reused and filled with junk.
> +	 * and that page table be reused and filled with junk.  If we actually
> +	 * did unshare a page of pmds, flush the range corresponding to the pud.
>  	 */
> -	flush_hugetlb_tlb_range(vma, start, end);
> +	if (shared_pmd)
> +		flush_hugetlb_tlb_range(vma, f_start, f_end);
> +	else
> +		flush_hugetlb_tlb_range(vma, start, end);
>  	/*
>  	 * No need to call mmu_notifier_invalidate_range() we are downgrading
>  	 * page table protection not changing it to point to a new page.
> @@ -4369,7 +4404,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  	 * See Documentation/vm/mmu_notifier.rst
>  	 */
>  	i_mmap_unlock_write(vma->vm_file->f_mapping);
> -	mmu_notifier_invalidate_range_end(mm, start, end);
> +	mmu_notifier_invalidate_range_end(mm, f_start, f_end);
>  
>  	return pages << h->order;
>  }
> -- 
> 2.17.1
> 
> 

  reply	other threads:[~2018-08-24  3:19 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-23 20:59 [PATCH v6 0/2] huge_pmd_unshare migration and flushing Mike Kravetz
2018-08-23 20:59 ` [PATCH v6 1/2] mm: migration: fix migration of huge PMD shared pages Mike Kravetz
2018-08-24  2:59   ` Naoya Horiguchi
2018-08-24  8:41   ` Michal Hocko
2018-08-24 18:08     ` Mike Kravetz
2018-08-27  7:46       ` Michal Hocko
2018-08-27 13:46         ` Jerome Glisse
2018-08-27 19:09           ` Michal Hocko
2018-08-29 17:24           ` Mike Kravetz
2018-08-29 18:14             ` Jerome Glisse
2018-08-29 18:39               ` Michal Hocko
2018-08-29 21:11                 ` Jerome Glisse
2018-08-30  0:40                   ` Mike Kravetz
2018-08-30 10:56                   ` Michal Hocko
2018-08-30 14:08                     ` Jerome Glisse
2018-08-30 16:19                       ` Michal Hocko
2018-08-30 16:57                         ` Jerome Glisse
2018-08-30 18:05                           ` Mike Kravetz
2018-08-30 18:39                             ` Jerome Glisse
2018-09-03  5:56                               ` Michal Hocko
2018-09-04 14:00                                 ` Jerome Glisse
2018-09-04 17:55                                   ` Mike Kravetz
2018-09-05  6:57                                   ` Michal Hocko
2018-08-27 16:42         ` Mike Kravetz
2018-08-27 19:11       ` Michal Hocko
2018-08-24  9:25   ` Michal Hocko
2018-08-23 20:59 ` [PATCH v6 2/2] hugetlb: take PMD sharing into account when flushing tlb/caches Mike Kravetz
2018-08-24  3:07   ` Naoya Horiguchi [this message]
2018-08-24 11:35 ` [PATCH v6 0/2] huge_pmd_unshare migration and flushing Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180824030746.GB31674@hori1.linux.bs1.fc.nec.co.jp \
    --to=n-horiguchi@ah.jp.nec.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=jglisse@redhat.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox