[PATCH v3 17/21] clear_huge_page: use non-cached clearing

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Ankur Arora <ankur.a.arora@oracle.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, x86@kernel.org
Cc: torvalds@linux-foundation.org, akpm@linux-foundation.org,
	mike.kravetz@oracle.com, mingo@kernel.org, luto@kernel.org,
	tglx@linutronix.de, bp@alien8.de, peterz@infradead.org,
	ak@linux.intel.com, arnd@arndb.de, jgg@nvidia.com,
	jon.grimm@amd.com, boris.ostrovsky@oracle.com,
	konrad.wilk@oracle.com, joao.m.martins@oracle.com,
	ankur.a.arora@oracle.com
Subject: [PATCH v3 17/21] clear_huge_page: use non-cached clearing
Date: Mon,  6 Jun 2022 20:37:21 +0000	[thread overview]
Message-ID: <20220606203725.1313715-13-ankur.a.arora@oracle.com> (raw)
In-Reply-To: <20220606202109.1306034-1-ankur.a.arora@oracle.com>

Non-caching stores are suitable for circumstances where the destination
region is unlikely to be read again soon, or is large enough that
there's no expectation that we will find the data in the cache.

Add a new parameter to clear_user_extent(), which handles the
non-caching clearing path for huge and gigantic pages. This needs a
final clear_page_make_coherent() operation since non-cached clearing
typically involves weakly ordered stores that are incoherent wrt other
operations in the memory hierarchy.

This path is always invoked for gigantic pages, for huge pages only if
pages_per_huge_page is greater than an architectural threshold, or if
the user gives an explicit hint (if for instance, this call is part of
a larger clearing operation.)

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/mm.h |  3 ++-
 mm/huge_memory.c   |  3 ++-
 mm/hugetlb.c       |  3 ++-
 mm/memory.c        | 50 +++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5084571b2fb6..a9b0c1889348 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3302,7 +3302,8 @@ enum mf_action_page_type {
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
 extern void clear_huge_page(struct page *page,
 			    unsigned long addr_hint,
-			    unsigned int pages_per_huge_page);
+			    unsigned int pages_per_huge_page,
+			    bool non_cached);
 extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned long addr_hint,
 				struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a77c78a2b6b5..73654db77a1c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -594,6 +594,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 	pgtable_t pgtable;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	vm_fault_t ret = 0;
+	bool non_cached = false;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -611,7 +612,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		goto release;
 	}
 
-	clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+	clear_huge_page(page, vmf->address, HPAGE_PMD_NR, non_cached);
 	/*
 	 * The memory barrier inside __SetPageUptodate makes sure that
 	 * clear_huge_page writes become visible before the set_pmd_at()
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c468ac1d069..0c4a31b5c1e9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5481,6 +5481,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	spinlock_t *ptl;
 	unsigned long haddr = address & huge_page_mask(h);
 	bool new_page, new_pagecache_page = false;
+	bool non_cached = false;
 
 	/*
 	 * Currently, we are forced to kill the process in the event the
@@ -5536,7 +5537,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 			spin_unlock(ptl);
 			goto out;
 		}
-		clear_huge_page(page, address, pages_per_huge_page(h));
+		clear_huge_page(page, address, pages_per_huge_page(h), non_cached);
 		__SetPageUptodate(page);
 		new_page = true;
 
diff --git a/mm/memory.c b/mm/memory.c
index b78b32a3e915..0638dc56828f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5606,11 +5606,18 @@ bool clear_page_prefer_non_caching(unsigned long extent)
  *
  * With ARCH_MAX_CLEAR_PAGES == 1, clear_user_highpages() drops down
  * to page-at-a-time mode. Or, funnels through to clear_user_pages().
+ *
+ * With coherent == false, we use incoherent stores and the caller is
+ * responsible for making the region coherent again by calling
+ * clear_page_make_coherent().
  */
 static void clear_user_extent(struct page *start_page, unsigned long vaddr,
-			      unsigned int npages)
+			      unsigned int npages, bool coherent)
 {
-	clear_user_highpages(start_page, vaddr, npages);
+	if (coherent)
+		clear_user_highpages(start_page, vaddr, npages);
+	else
+		clear_user_highpages_incoherent(start_page, vaddr, npages);
 }
 
 struct subpage_arg {
@@ -5709,6 +5716,13 @@ static void clear_gigantic_page(struct page *page,
 {
 	int i;
 	struct page *p = page;
+	bool coherent;
+
+	/*
+	 * Gigantic pages are large enough, that there are no cache
+	 * expectations. Use the incoherent path.
+	 */
+	coherent = false;
 
 	might_sleep();
 	for (i = 0; i < pages_per_huge_page;
@@ -5718,9 +5732,16 @@ static void clear_gigantic_page(struct page *page,
 		 * guarantees that p[0] and p[clear_page_unit-1]
 		 * never straddle a mem_map discontiguity.
 		 */
-		clear_user_extent(p, base_addr + i * PAGE_SIZE, clear_page_unit);
+		clear_user_extent(p, base_addr + i * PAGE_SIZE,
+				  clear_page_unit, coherent);
 		cond_resched();
 	}
+
+	/*
+	 * We need to make sure that writes above are ordered before
+	 * updating the PTE and marking SetPageUptodate().
+	 */
+	clear_page_make_coherent();
 }
 
 static void clear_subpages(struct subpage_arg *sa,
@@ -5736,15 +5757,16 @@ static void clear_subpages(struct subpage_arg *sa,
 
 		n = min(clear_page_unit, remaining);
 
-		clear_user_extent(page + i, base_addr + i * PAGE_SIZE, n);
+		clear_user_extent(page + i, base_addr + i * PAGE_SIZE,
+				  n, true);
 		i += n;
 
 		cond_resched();
 	}
 }
 
-void clear_huge_page(struct page *page,
-		     unsigned long addr_hint, unsigned int pages_per_huge_page)
+void clear_huge_page(struct page *page, unsigned long addr_hint,
+		     unsigned int pages_per_huge_page, bool non_cached)
 {
 	unsigned long addr = addr_hint &
 		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
@@ -5755,7 +5777,21 @@ void clear_huge_page(struct page *page,
 		.page_unit = clear_page_unit,
 	};
 
-	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+	/*
+	 * The non-caching path is typically slower for small extents so use
+	 * it only if the caller explicitly hints it or if the extent is
+	 * large enough that there are no cache expectations.
+	 *
+	 * We let the gigantic page path handle the details.
+	 */
+	non_cached |=
+		clear_page_prefer_non_caching(pages_per_huge_page * PAGE_SIZE);
+
+	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES || non_cached)) {
+		/*
+		 * Gigantic page clearing always uses incoherent clearing
+		 * internally.
+		 */
 		clear_gigantic_page(page, addr, pages_per_huge_page);
 		return;
 	}
-- 
2.31.1

next prev parent reply	other threads:[~2022-06-06 20:42 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-06 20:20 [PATCH v3 00/21] huge page clearing optimizations Ankur Arora
2022-06-06 20:20 ` [PATCH v3 01/21] mm, huge-page: reorder arguments to process_huge_page() Ankur Arora
2022-06-06 20:20 ` [PATCH v3 02/21] mm, huge-page: refactor process_subpage() Ankur Arora
2022-06-06 20:20 ` [PATCH v3 03/21] clear_page: add generic clear_user_pages() Ankur Arora
2022-06-06 20:20 ` [PATCH v3 04/21] mm, clear_huge_page: support clear_user_pages() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 05/21] mm/huge_page: generalize process_huge_page() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 06/21] x86/clear_page: add clear_pages() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 07/21] x86/asm: add memset_movnti() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 08/21] perf bench: " Ankur Arora
2022-06-06 20:37 ` [PATCH v3 09/21] x86/asm: add clear_pages_movnt() Ankur Arora
2022-06-10 22:11   ` Noah Goldstein
2022-06-10 22:15     ` Noah Goldstein
2022-06-12 11:18       ` Ankur Arora
2022-06-06 20:37 ` [PATCH v3 10/21] x86/asm: add clear_pages_clzero() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 11/21] x86/cpuid: add X86_FEATURE_MOVNT_SLOW Ankur Arora
2022-06-06 20:37 ` [PATCH v3 12/21] sparse: add address_space __incoherent Ankur Arora
2022-06-06 20:37 ` [PATCH v3 13/21] clear_page: add generic clear_user_pages_incoherent() Ankur Arora
2022-06-08  0:01   ` Luc Van Oostenryck
2022-06-12 11:19     ` Ankur Arora
2022-06-06 20:37 ` [PATCH v3 14/21] x86/clear_page: add clear_pages_incoherent() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 15/21] mm/clear_page: add clear_page_non_caching_threshold() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 16/21] x86/clear_page: add arch_clear_page_non_caching_threshold() Ankur Arora
2022-06-06 20:37 ` Ankur Arora [this message]
2022-06-06 20:37 ` [PATCH v3 18/21] gup: add FOLL_HINT_BULK, FAULT_FLAG_NON_CACHING Ankur Arora
2022-06-06 20:37 ` [PATCH v3 19/21] gup: hint non-caching if clearing large regions Ankur Arora
2022-06-06 20:37 ` [PATCH v3 20/21] vfio_iommu_type1: specify FOLL_HINT_BULK to pin_user_pages() Ankur Arora
2022-06-06 20:37 ` [PATCH v3 21/21] x86/cpu/intel: set X86_FEATURE_MOVNT_SLOW for Skylake Ankur Arora
2022-06-06 21:53 ` [PATCH v3 00/21] huge page clearing optimizations Linus Torvalds
2022-06-07 15:08   ` Ankur Arora
2022-06-07 17:56     ` Linus Torvalds
2022-06-08 19:24       ` Ankur Arora
2022-06-08 19:39         ` Linus Torvalds
2022-06-08 20:21           ` Ankur Arora
2022-06-08 19:49       ` Matthew Wilcox
2022-06-08 19:51         ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220606203725.1313715-13-ankur.a.arora@oracle.com \
    --to=ankur.a.arora@oracle.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=jon.grimm@amd.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox