From: Lance Yang <lance.yang@linux.dev>
To: akpm@linux-foundation.org
Cc: david@kernel.org, dave.hansen@intel.com,
dave.hansen@linux.intel.com, will@kernel.org,
aneesh.kumar@kernel.org, npiggin@gmail.com, peterz@infradead.org,
tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
x86@kernel.org, hpa@zytor.com, arnd@arndb.de,
lorenzo.stoakes@oracle.com, ziy@nvidia.com,
baolin.wang@linux.alibaba.com, Liam.Howlett@oracle.com,
npache@redhat.com, ryan.roberts@arm.com, dev.jain@arm.com,
baohua@kernel.org, shy828301@gmail.com, riel@surriel.com,
jannh@google.com, linux-arch@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, ioworker0@gmail.com,
Lance Yang <lance.yang@linux.dev>
Subject: [PATCH RESEND v3 2/2] mm: introduce pmdp_collapse_flush_sync() to skip redundant IPI
Date: Tue, 6 Jan 2026 20:03:03 +0800 [thread overview]
Message-ID: <20260106120303.38124-3-lance.yang@linux.dev> (raw)
In-Reply-To: <20260106120303.38124-1-lance.yang@linux.dev>
From: Lance Yang <lance.yang@linux.dev>
pmdp_collapse_flush() may already send IPIs to flush TLBs, and then
callers send another IPI via tlb_remove_table_sync_one() or
pmdp_get_lockless_sync() to synchronize with concurrent GUP-fast walkers.
However, since GUP-fast runs with IRQs disabled, the TLB flush IPI already
provides the necessary synchronization. We can avoid the redundant second
IPI.
Introduce pmdp_collapse_flush_sync() which combines flush and sync:
- For architectures using the generic pmdp_collapse_flush() implementation
(e.g., x86): Use mmu_gather to track IPI sends. If the TLB flush sent
an IPI, tlb_gather_remove_table_sync_one() will skip the redundant one.
- For architectures with custom pmdp_collapse_flush() (s390, riscv,
powerpc): Fall back to calling pmdp_collapse_flush() followed by
tlb_remove_table_sync_one(). No behavior change.
Update khugepaged to use pmdp_collapse_flush_sync() instead of separate
flush and sync calls. Remove the now-unused pmdp_get_lockless_sync() macro.
Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Lance Yang <lance.yang@linux.dev>
---
include/linux/pgtable.h | 13 +++++++++----
mm/khugepaged.c | 9 +++------
mm/pgtable-generic.c | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index eb8aacba3698..69e290dab450 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -755,7 +755,6 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
return pmd;
}
#define pmdp_get_lockless pmdp_get_lockless
-#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
@@ -774,9 +773,6 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
{
return pmdp_get(pmdp);
}
-static inline void pmdp_get_lockless_sync(void)
-{
-}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1174,6 +1170,8 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+extern pmd_t pmdp_collapse_flush_sync(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
#else
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address,
@@ -1182,6 +1180,13 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
BUILD_BUG();
return *pmdp;
}
+static inline pmd_t pmdp_collapse_flush_sync(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ BUILD_BUG();
+ return *pmdp;
+}
#define pmdp_collapse_flush pmdp_collapse_flush
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9f790ec34400..0a98afc85c50 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1177,10 +1177,9 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
* Parallel GUP-fast is fine since GUP-fast will back off when
* it detects PMD is changed.
*/
- _pmd = pmdp_collapse_flush(vma, address, pmd);
+ _pmd = pmdp_collapse_flush_sync(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(&range);
- tlb_remove_table_sync_one();
pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
if (pte) {
@@ -1663,8 +1662,7 @@ static enum scan_result try_collapse_pte_mapped_thp(struct mm_struct *mm, unsign
}
}
}
- pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
- pmdp_get_lockless_sync();
+ pgt_pmd = pmdp_collapse_flush_sync(vma, haddr, pmd);
pte_unmap_unlock(start_pte, ptl);
if (ptl != pml)
spin_unlock(pml);
@@ -1817,8 +1815,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* races against the prior checks.
*/
if (likely(file_backed_vma_is_retractable(vma))) {
- pgt_pmd = pmdp_collapse_flush(vma, addr, pmd);
- pmdp_get_lockless_sync();
+ pgt_pmd = pmdp_collapse_flush_sync(vma, addr, pmd);
success = true;
}
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index d3aec7a9926a..be2ee82e6fc4 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -233,6 +233,40 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
+
+pmd_t pmdp_collapse_flush_sync(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ struct mmu_gather tlb;
+ pmd_t pmd;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
+
+ tlb_gather_mmu(&tlb, vma->vm_mm);
+ pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+
+ flush_tlb_mm_range(vma->vm_mm, address, address + HPAGE_PMD_SIZE,
+ PAGE_SHIFT, true, &tlb);
+
+ /*
+ * Synchronize with GUP-fast. If the flush sent IPIs, skip the
+ * redundant sync IPI.
+ */
+ tlb_gather_remove_table_sync_one(&tlb);
+ tlb_finish_mmu(&tlb);
+ return pmd;
+}
+#else
+pmd_t pmdp_collapse_flush_sync(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_t pmd;
+
+ pmd = pmdp_collapse_flush(vma, address, pmdp);
+ tlb_remove_table_sync_one();
+ return pmd;
+}
#endif
/* arch define pte_free_defer in asm/pgalloc.h for its own implementation */
--
2.49.0
next prev parent reply other threads:[~2026-01-06 12:03 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-06 12:03 [PATCH RESEND v3 0/2] skip redundant TLB sync IPIs Lance Yang
2026-01-06 12:03 ` [PATCH RESEND v3 1/2] mm/tlb: skip redundant IPI when TLB flush already synchronized Lance Yang
2026-01-06 15:19 ` David Hildenbrand (Red Hat)
2026-01-06 16:10 ` Lance Yang
2026-01-07 6:37 ` Lance Yang
2026-01-09 14:11 ` David Hildenbrand (Red Hat)
2026-01-09 14:13 ` David Hildenbrand (Red Hat)
2026-01-09 15:30 ` Lance Yang
2026-01-09 15:40 ` David Hildenbrand (Red Hat)
2026-01-06 16:24 ` Dave Hansen
2026-01-07 2:47 ` Lance Yang
2026-01-06 12:03 ` Lance Yang [this message]
2026-01-06 15:07 ` [PATCH RESEND v3 2/2] mm: introduce pmdp_collapse_flush_sync() to skip redundant IPI David Hildenbrand (Red Hat)
2026-01-06 15:41 ` Lance Yang
2026-01-07 9:46 ` kernel test robot
2026-01-07 10:52 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260106120303.38124-3-lance.yang@linux.dev \
--to=lance.yang@linux.dev \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@kernel.org \
--cc=arnd@arndb.de \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bp@alien8.de \
--cc=dave.hansen@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=hpa@zytor.com \
--cc=ioworker0@gmail.com \
--cc=jannh@google.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mingo@redhat.com \
--cc=npache@redhat.com \
--cc=npiggin@gmail.com \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=ryan.roberts@arm.com \
--cc=shy828301@gmail.com \
--cc=tglx@linutronix.de \
--cc=will@kernel.org \
--cc=x86@kernel.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox