Re: [PATCH v8 10/12] x86/mm: do targeted broadcast flushing from tlbbatch code

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Rik van Riel <riel@surriel.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, bp@alien8.de,
	dave.hansen@linux.intel.com, zhengqi.arch@bytedance.com,
	nadav.amit@gmail.com, thomas.lendacky@amd.com,
	kernel-team@meta.com, linux-mm@kvack.org,
	akpm@linux-foundation.org, jannh@google.com,
	mhklinux@outlook.com, andrew.cooper3@citrix.com,
	Manali Shukla <Manali.Shukla@amd.com>,
	David.Kaplan@amd.com
Subject: Re: [PATCH v8 10/12] x86/mm: do targeted broadcast flushing from tlbbatch code
Date: Wed, 5 Feb 2025 14:51:56 +0100	[thread overview]
Message-ID: <20250205135156.GI14028@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250205014033.3626204-11-riel@surriel.com>

On Tue, Feb 04, 2025 at 08:39:59PM -0500, Rik van Riel wrote:

> @@ -1657,12 +1655,65 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>  		local_irq_enable();
>  	}
>  
> +	/*
> +	 * If we issued (asynchronous) INVLPGB flushes, wait for them here.
> +	 * The cpumask above contains only CPUs that were running tasks
> +	 * not using broadcast TLB flushing.
> +	 */
> +	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
> +		tlbsync();
> +		migrate_enable();
> +		batch->used_invlpgb = false;
> +	}
> +
>  	cpumask_clear(&batch->cpumask);
>  
>  	put_flush_tlb_info();
>  	put_cpu();
>  }
>  
> +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
> +					     struct mm_struct *mm,
> +					     unsigned long uaddr)
> +{
> +	u16 asid = mm_global_asid(mm);
> +
> +	if (asid) {
> +		/*
> +		 * Queue up an asynchronous invalidation. The corresponding
> +		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
> +		 * on the same CPU.
> +		 */
> +		if (!batch->used_invlpgb) {
> +			batch->used_invlpgb = true;
> +			migrate_disable();
> +		}

How about we do something like this instead?

This keeps all the TLBSYNC in the same task as the INVLPGB, without
making things complicated and allowing random CR3 writes in between
them -- which makes my head hurt.

---
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,7 +10,6 @@ struct arch_tlbflush_unmap_batch {
 	 * the PFNs being flushed..
 	 */
 	struct cpumask cpumask;
-	bool used_invlpgb;
 };
 
 #endif /* _ARCH_X86_TLBBATCH_H */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -106,6 +106,7 @@ struct tlb_state {
 	 * need to be invalidated.
 	 */
 	bool invalidate_other;
+	bool need_tlbsync;
 
 #ifdef CONFIG_ADDRESS_MASKING
 	/*
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -266,6 +266,37 @@ static void choose_new_asid(struct mm_st
 	*need_flush = true;
 }
 
+static inline void tlbsync(void)
+{
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		return;
+	__tlbsync();
+	this_cpu_write(cpu_tlbstate.need_tlbsync, false);
+}
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool pmd_stride)
+{
+	__invlpgb_flush_user_nr(pcid, addr, nr, pmd_stride);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_flush_single_pcid(pcid);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb_flush_addr(addr, nr);
+	if (!this_cpu_read(cpu_tlbstate.need_tlbsync))
+		this_cpu_write(cpu_tlbstate.need_tlbsync, true);
+}
+
 #ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
 /*
  * Logic for broadcast TLB invalidation.
@@ -793,6 +824,8 @@ void switch_mm_irqs_off(struct mm_struct
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
 		WARN_ON_ONCE(!irqs_disabled());
 
+	tlbsync();
+
 	/*
 	 * Verify that CR3 is what we think it is.  This will catch
 	 * hypothetical buggy code that directly switches to swapper_pg_dir
@@ -968,6 +1001,8 @@ void switch_mm_irqs_off(struct mm_struct
  */
 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
+	tlbsync();
+
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
 		return;
 
@@ -1623,11 +1658,8 @@ void arch_tlbbatch_flush(struct arch_tlb
 	 * The cpumask above contains only CPUs that were running tasks
 	 * not using broadcast TLB flushing.
 	 */
-	if (cpu_feature_enabled(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
 		tlbsync();
-		migrate_enable();
-		batch->used_invlpgb = false;
-	}
 
 	cpumask_clear(&batch->cpumask);
 
@@ -1647,10 +1679,6 @@ void arch_tlbbatch_add_pending(struct ar
 		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
 		 * on the same CPU.
 		 */
-		if (!batch->used_invlpgb) {
-			batch->used_invlpgb = true;
-			migrate_disable();
-		}
 		invlpgb_flush_user_nr_nosync(kern_pcid(asid), uaddr, 1, false);
 		/* Do any CPUs supporting INVLPGB need PTI? */
 		if (static_cpu_has(X86_FEATURE_PTI))
--- a/arch/x86/include/asm/invlpgb.h
+++ b/arch/x86/include/asm/invlpgb.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_INVLPGB
 
 #include <linux/kernel.h>
+#include <asm/page_types.h>
 #include <vdso/bits.h>
 #include <vdso/page.h>
 
@@ -31,9 +32,8 @@ static inline void __invlpgb(unsigned lo
 }
 
 /* Wait for INVLPGB originated by this CPU to complete. */
-static inline void tlbsync(void)
+static inline void __tlbsync(void)
 {
-	cant_migrate();
 	/* TLBSYNC: supported in binutils >= 0.36. */
 	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
 }
@@ -61,19 +61,19 @@ static inline void invlpgb_flush_user(un
 				      unsigned long addr)
 {
 	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
-	tlbsync();
+	__tlbsync();
 }
 
-static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
-						unsigned long addr,
-						u16 nr,
-						bool pmd_stride)
+static inline void __invlpgb_flush_user_nr(unsigned long pcid,
+					   unsigned long addr,
+					   u16 nr,
+					   bool pmd_stride)
 {
 	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
 }
 
 /* Flush all mappings for a given PCID, not including globals. */
-static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+static inline void __invlpgb_flush_single_pcid(unsigned long pcid)
 {
 	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
 }
@@ -82,11 +82,11 @@ static inline void invlpgb_flush_single_
 static inline void invlpgb_flush_all(void)
 {
 	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
-	tlbsync();
+	__tlbsync();
 }
 
 /* Flush addr, including globals, for all PCIDs. */
-static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+static inline void __invlpgb_flush_addr(unsigned long addr, u16 nr)
 {
 	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
 }
@@ -95,7 +95,7 @@ static inline void invlpgb_flush_addr_no
 static inline void invlpgb_flush_all_nonglobals(void)
 {
 	__invlpgb(0, 0, 0, 0, 0, 0);
-	tlbsync();
+	__tlbsync();
 }
 
 #endif /* _ASM_X86_INVLPGB */

next prev parent reply	other threads:[~2025-02-05 13:52 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-05  1:39 [PATCH v8 00/12] AMD broadcast TLB invalidation Rik van Riel
2025-02-05  1:39 ` [PATCH v8 01/12] x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional Rik van Riel
2025-02-05  1:39 ` [PATCH v8 02/12] x86/mm: remove pv_ops.mmu.tlb_remove_table call Rik van Riel
2025-02-05  1:39 ` [PATCH v8 03/12] x86/mm: consolidate full flush threshold decision Rik van Riel
2025-02-05 12:20   ` Peter Zijlstra
2025-02-05 13:00     ` Peter Zijlstra
2025-02-05 13:52     ` Rik van Riel
2025-02-05  1:39 ` [PATCH v8 04/12] x86/mm: get INVLPGB count max from CPUID Rik van Riel
2025-02-05  1:39 ` [PATCH v8 05/12] x86/mm: add INVLPGB support code Rik van Riel
2025-02-05  1:39 ` [PATCH v8 06/12] x86/mm: use INVLPGB for kernel TLB flushes Rik van Riel
2025-02-05  1:39 ` [PATCH v8 07/12] x86/mm: use INVLPGB in flush_tlb_all Rik van Riel
2025-02-05  1:39 ` [PATCH v8 08/12] x86/mm: use broadcast TLB flushing for page reclaim TLB flushing Rik van Riel
2025-02-05  1:39 ` [PATCH v8 09/12] x86/mm: enable broadcast TLB invalidation for multi-threaded processes Rik van Riel
2025-02-05  1:39 ` [PATCH v8 10/12] x86/mm: do targeted broadcast flushing from tlbbatch code Rik van Riel
2025-02-05 13:51   ` Peter Zijlstra [this message]
2025-02-05 14:52     ` Rik van Riel
2025-02-05  1:40 ` [PATCH v8 11/12] x86/mm: enable AMD translation cache extensions Rik van Riel
2025-02-05  1:40 ` [PATCH v8 12/12] x86/mm: only invalidate final translations with INVLPGB Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250205135156.GI14028@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=David.Kaplan@amd.com \
    --cc=Manali.Shukla@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=jannh@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhklinux@outlook.com \
    --cc=nadav.amit@gmail.com \
    --cc=riel@surriel.com \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox