linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yu Zhao <yuzhao@google.com>
To: Janosch Frank <frankja@linux.ibm.com>,
	kernel test robot <oliver.sang@intel.com>
Cc: oe-lkp@lists.linux.dev, lkp@intel.com,
	 Linux Memory Management List <linux-mm@kvack.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	 Muchun Song <muchun.song@linux.dev>,
	David Hildenbrand <david@redhat.com>,
	 Frank van der Linden <fvdl@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	Peter Xu <peterx@redhat.com>,
	 Yang Shi <yang@os.amperecomputing.com>,
	linux-kernel@vger.kernel.org,  ying.huang@intel.com,
	feng.tang@intel.com, fengwei.yin@intel.com,
	 Christian Borntraeger <borntraeger@linux.ibm.com>,
	Claudio Imbrenda <imbrenda@linux.ibm.com>,
	 Marc Hartmayer <mhartmay@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>
Subject: Re: [linux-next:master] [mm/hugetlb_vmemmap] 875fa64577: vm-scalability.throughput -34.3% regression
Date: Wed, 17 Jul 2024 02:36:37 -0600	[thread overview]
Message-ID: <CAOUHufYvCeiGGa+3PbPDfFx__aV2XxyS7TMHMicws4aNTg4qmQ@mail.gmail.com> (raw)
In-Reply-To: <3acefad9-96e5-4681-8014-827d6be71c7a@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 746 bytes --]

Hi Janosch and Oliver,

On Wed, Jul 17, 2024 at 1:57 AM Janosch Frank <frankja@linux.ibm.com> wrote:
>
> On 7/9/24 07:11, kernel test robot wrote:
> > Hello,
> >
> > kernel test robot noticed a -34.3% regression of vm-scalability.throughput on:
> >
> >
> > commit: 875fa64577da9bc8e9963ee14fef8433f20653e7 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers")
> > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> >
> > [still regression on linux-next/master 0b58e108042b0ed28a71cd7edf5175999955b233]
> >
> This has hit s390 huge page backed KVM guests as well.
> Our simple start/stop test case went from ~5 to over 50 seconds of runtime.

Could you try the attached patch please? Thank you.

[-- Attachment #2: hugetlb-fix.patch --]
[-- Type: application/octet-stream, Size: 3952 bytes --]

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 8193906515c6..9e6fc4ce8d2b 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -43,6 +43,8 @@ struct vmemmap_remap_walk {
 #define VMEMMAP_SPLIT_NO_TLB_FLUSH	BIT(0)
 /* Skip the TLB flush when we remap the PTE */
 #define VMEMMAP_REMAP_NO_TLB_FLUSH	BIT(1)
+/* synchronize_rcu() to avoid writes from page_ref_add_unless() */
+#define VMEMMAP_SYNCHRONIZE_RCU		BIT(2)
 	unsigned long		flags;
 };
 
@@ -451,6 +453,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
 	if (!folio_test_hugetlb_vmemmap_optimized(folio))
 		return 0;
 
+	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
+		synchronize_rcu();
+
 	vmemmap_end	= vmemmap_start + hugetlb_vmemmap_size(h);
 	vmemmap_reuse	= vmemmap_start;
 	vmemmap_start	+= HUGETLB_VMEMMAP_RESERVE_SIZE;
@@ -483,10 +488,7 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
  */
 int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
 {
-	/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
-	synchronize_rcu();
-
-	return __hugetlb_vmemmap_restore_folio(h, folio, 0);
+	return __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_SYNCHRONIZE_RCU);
 }
 
 /**
@@ -509,14 +511,13 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
 	struct folio *folio, *t_folio;
 	long restored = 0;
 	long ret = 0;
-
-	/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
-	synchronize_rcu();
+	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
 
 	list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
 		if (folio_test_hugetlb_vmemmap_optimized(folio)) {
-			ret = __hugetlb_vmemmap_restore_folio(h, folio,
-							      VMEMMAP_REMAP_NO_TLB_FLUSH);
+			ret = __hugetlb_vmemmap_restore_folio(h, folio, flags);
+			flags &= VMEMMAP_SYNCHRONIZE_RCU;
+
 			if (ret)
 				break;
 			restored++;
@@ -564,6 +565,9 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 		return ret;
 
 	static_branch_inc(&hugetlb_optimize_vmemmap_key);
+
+	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
+		synchronize_rcu();
 	/*
 	 * Very Subtle
 	 * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed
@@ -611,10 +615,7 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
 {
 	LIST_HEAD(vmemmap_pages);
 
-	/* avoid writes from page_ref_add_unless() while folding vmemmap */
-	synchronize_rcu();
-
-	__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
+	__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_SYNCHRONIZE_RCU);
 	free_vmemmap_page_list(&vmemmap_pages);
 }
 
@@ -641,6 +642,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 {
 	struct folio *folio;
 	LIST_HEAD(vmemmap_pages);
+	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
 
 	list_for_each_entry(folio, folio_list, lru) {
 		int ret = hugetlb_vmemmap_split_folio(h, folio);
@@ -657,14 +659,11 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 
 	flush_tlb_all();
 
-	/* avoid writes from page_ref_add_unless() while folding vmemmap */
-	synchronize_rcu();
-
 	list_for_each_entry(folio, folio_list, lru) {
 		int ret;
 
-		ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages,
-						       VMEMMAP_REMAP_NO_TLB_FLUSH);
+		ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags);
+		flags &= ~VMEMMAP_SYNCHRONIZE_RCU;
 
 		/*
 		 * Pages to be freed may have been accumulated.  If we
@@ -678,8 +677,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 			flush_tlb_all();
 			free_vmemmap_page_list(&vmemmap_pages);
 			INIT_LIST_HEAD(&vmemmap_pages);
-			__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages,
-							 VMEMMAP_REMAP_NO_TLB_FLUSH);
+			__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags);
 		}
 	}
 

  parent reply	other threads:[~2024-07-17  8:37 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-09  5:11 kernel test robot
2024-07-10  6:22 ` Yu Zhao
2024-07-14 12:26   ` Oliver Sang
2024-07-15  2:40     ` Muchun Song
2024-07-15  4:08       ` Oliver Sang
2024-07-17  7:52 ` Janosch Frank
2024-07-17  7:59   ` Christian Borntraeger
2024-07-17  8:36   ` Yu Zhao [this message]
2024-07-17 15:44     ` Yu Zhao
2024-07-18  9:23       ` Marc Hartmayer
2024-07-19  8:42       ` Oliver Sang
2024-07-19 16:06         ` Yu Zhao
2024-08-03 22:07           ` Yu Zhao
2024-08-06  3:01             ` Oliver Sang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAOUHufYvCeiGGa+3PbPDfFx__aV2XxyS7TMHMicws4aNTg4qmQ@mail.gmail.com \
    --to=yuzhao@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=borntraeger@linux.ibm.com \
    --cc=david@redhat.com \
    --cc=feng.tang@intel.com \
    --cc=fengwei.yin@intel.com \
    --cc=frankja@linux.ibm.com \
    --cc=fvdl@google.com \
    --cc=hca@linux.ibm.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lkp@intel.com \
    --cc=mhartmay@linux.ibm.com \
    --cc=muchun.song@linux.dev \
    --cc=oe-lkp@lists.linux.dev \
    --cc=oliver.sang@intel.com \
    --cc=peterx@redhat.com \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox