linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Byungchul Park <byungchul@sk.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: kernel_team@skhynix.com, akpm@linux-foundation.org,
	vernhao@tencent.com, mgorman@techsingularity.net,
	hughd@google.com, willy@infradead.org, david@redhat.com,
	peterz@infradead.org, luto@kernel.org, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, rjgolo@gmail.com
Subject: [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 17/25]  x86/tlb, riscv/tlb, arm64/tlbflush, mm: remove cpus from tlb shootdown that already have been done
Date: Wed, 26 Feb 2025 21:01:24 +0900	[thread overview]
Message-ID: <20250226120132.28469-17-byungchul@sk.com> (raw)
In-Reply-To: <20250226120132.28469-1-byungchul@sk.com>

luf mechanism performs tlb shootdown for mappings that have been
unmapped in lazy manner.  However, it doesn't have to perform tlb
shootdown to cpus that already have been done by others since the tlb
shootdown was desired.

Since luf already introduced its own generation number used as a global
timestamp, luf_ugen, it's possible to selectively pick cpus that have
been done tlb flush required.

This patch introduced APIs that use the generation number to select and
remove those cpus so that it can perform tlb shootdown with a smaller
cpumask, for all the CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH archs,
x86, riscv, and arm64.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 arch/arm64/include/asm/tlbflush.h |  26 +++++++
 arch/riscv/include/asm/tlbflush.h |   4 ++
 arch/riscv/mm/tlbflush.c          | 108 ++++++++++++++++++++++++++++++
 arch/x86/include/asm/tlbflush.h   |   4 ++
 arch/x86/mm/tlb.c                 | 108 ++++++++++++++++++++++++++++++
 include/linux/sched.h             |   1 +
 mm/internal.h                     |   4 ++
 mm/page_alloc.c                   |  32 +++++++--
 mm/rmap.c                         |  46 ++++++++++++-
 9 files changed, 327 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index f7036cd33e35c..ae3c981fcc218 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -347,6 +347,32 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 	dsb(ish);
 }
 
+static inline bool arch_tlbbatch_check_done(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen)
+{
+	/*
+	 * Nothing is needed in this architecture.
+	 */
+	return true;
+}
+
+static inline bool arch_tlbbatch_diet(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen)
+{
+	/*
+	 * Nothing is needed in this architecture.
+	 */
+	return true;
+}
+
+static inline void arch_tlbbatch_mark_ugen(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen)
+{
+	/* nothing to do */
+}
+
+static inline void arch_mm_mark_ugen(struct mm_struct *mm, unsigned long ugen)
+{
+	/* nothing to do */
+}
+
 static inline void arch_tlbbatch_clear(struct arch_tlbflush_unmap_batch *batch)
 {
 	/* nothing to do */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index cecd8e7e2a3bd..936bf9ce0abd9 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -64,6 +64,10 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 		struct mm_struct *mm, unsigned long start, unsigned long end);
 void arch_flush_tlb_batched_pending(struct mm_struct *mm);
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+bool arch_tlbbatch_check_done(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+bool arch_tlbbatch_diet(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+void arch_tlbbatch_mark_ugen(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+void arch_mm_mark_ugen(struct mm_struct *mm, unsigned long ugen);
 
 static inline void arch_tlbbatch_clear(struct arch_tlbflush_unmap_batch *batch)
 {
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 38f4bea8a964a..6ce44370a8e11 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -201,3 +201,111 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
 			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
 }
+
+static DEFINE_PER_CPU(atomic_long_t, ugen_done);
+
+static int __init luf_init_arch(void)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_possible_mask)
+		atomic_long_set(per_cpu_ptr(&ugen_done, cpu), LUF_UGEN_INIT - 1);
+
+	return 0;
+}
+early_initcall(luf_init_arch);
+
+/*
+ * batch will not be updated.
+ */
+bool arch_tlbbatch_check_done(struct arch_tlbflush_unmap_batch *batch,
+			unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		goto out;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		unsigned long done;
+
+		done = atomic_long_read(per_cpu_ptr(&ugen_done, cpu));
+		if (ugen_before(done, ugen))
+			return false;
+	}
+	return true;
+out:
+	return cpumask_empty(&batch->cpumask);
+}
+
+bool arch_tlbbatch_diet(struct arch_tlbflush_unmap_batch *batch,
+			unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		goto out;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		unsigned long done;
+
+		done = atomic_long_read(per_cpu_ptr(&ugen_done, cpu));
+		if (!ugen_before(done, ugen))
+			cpumask_clear_cpu(cpu, &batch->cpumask);
+	}
+out:
+	return cpumask_empty(&batch->cpumask);
+}
+
+void arch_tlbbatch_mark_ugen(struct arch_tlbflush_unmap_batch *batch,
+			     unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		return;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		atomic_long_t *done = per_cpu_ptr(&ugen_done, cpu);
+		unsigned long old = atomic_long_read(done);
+
+		/*
+		 * It's racy.  The race results in unnecessary tlb flush
+		 * because of the smaller ugen_done than it should be.
+		 * However, it's okay in terms of correctness.
+		 */
+		if (!ugen_before(old, ugen))
+			continue;
+
+		/*
+		 * It's for optimization.  Just skip on fail than retry.
+		 */
+		atomic_long_cmpxchg(done, old, ugen);
+	}
+}
+
+void arch_mm_mark_ugen(struct mm_struct *mm, unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		return;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		atomic_long_t *done = per_cpu_ptr(&ugen_done, cpu);
+		unsigned long old = atomic_long_read(done);
+
+		/*
+		 * It's racy.  The race results in unnecessary tlb flush
+		 * because of the smaller ugen_done than it should be.
+		 * However, it's okay in terms of correctness.
+		 */
+		if (!ugen_before(old, ugen))
+			continue;
+
+		/*
+		 * It's for optimization.  Just skip on fail than retry.
+		 */
+		atomic_long_cmpxchg(done, old, ugen);
+	}
+}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 52c54ca68ca9e..58ad7e6989bb1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -293,6 +293,10 @@ static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 }
 
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern bool arch_tlbbatch_check_done(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+extern bool arch_tlbbatch_diet(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+extern void arch_tlbbatch_mark_ugen(struct arch_tlbflush_unmap_batch *batch, unsigned long ugen);
+extern void arch_mm_mark_ugen(struct mm_struct *mm, unsigned long ugen);
 
 static inline void arch_tlbbatch_clear(struct arch_tlbflush_unmap_batch *batch)
 {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 523e8bb6fba1f..be6068b60c32d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1270,6 +1270,114 @@ void __flush_tlb_all(void)
 }
 EXPORT_SYMBOL_GPL(__flush_tlb_all);
 
+static DEFINE_PER_CPU(atomic_long_t, ugen_done);
+
+static int __init luf_init_arch(void)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_possible_mask)
+		atomic_long_set(per_cpu_ptr(&ugen_done, cpu), LUF_UGEN_INIT - 1);
+
+	return 0;
+}
+early_initcall(luf_init_arch);
+
+/*
+ * batch will not be updated.
+ */
+bool arch_tlbbatch_check_done(struct arch_tlbflush_unmap_batch *batch,
+			unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		goto out;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		unsigned long done;
+
+		done = atomic_long_read(per_cpu_ptr(&ugen_done, cpu));
+		if (ugen_before(done, ugen))
+			return false;
+	}
+	return true;
+out:
+	return cpumask_empty(&batch->cpumask);
+}
+
+bool arch_tlbbatch_diet(struct arch_tlbflush_unmap_batch *batch,
+			unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		goto out;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		unsigned long done;
+
+		done = atomic_long_read(per_cpu_ptr(&ugen_done, cpu));
+		if (!ugen_before(done, ugen))
+			cpumask_clear_cpu(cpu, &batch->cpumask);
+	}
+out:
+	return cpumask_empty(&batch->cpumask);
+}
+
+void arch_tlbbatch_mark_ugen(struct arch_tlbflush_unmap_batch *batch,
+			     unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		return;
+
+	for_each_cpu(cpu, &batch->cpumask) {
+		atomic_long_t *done = per_cpu_ptr(&ugen_done, cpu);
+		unsigned long old = atomic_long_read(done);
+
+		/*
+		 * It's racy.  The race results in unnecessary tlb flush
+		 * because of the smaller ugen_done than it should be.
+		 * However, it's okay in terms of correctness.
+		 */
+		if (!ugen_before(old, ugen))
+			continue;
+
+		/*
+		 * It's for optimization.  Just skip on fail than retry.
+		 */
+		atomic_long_cmpxchg(done, old, ugen);
+	}
+}
+
+void arch_mm_mark_ugen(struct mm_struct *mm, unsigned long ugen)
+{
+	int cpu;
+
+	if (!ugen)
+		return;
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		atomic_long_t *done = per_cpu_ptr(&ugen_done, cpu);
+		unsigned long old = atomic_long_read(done);
+
+		/*
+		 * It's racy.  The race results in unnecessary tlb flush
+		 * because of the smaller ugen_done than it should be.
+		 * However, it's okay in terms of correctness.
+		 */
+		if (!ugen_before(old, ugen))
+			continue;
+
+		/*
+		 * It's for optimization.  Just skip on fail than retry.
+		 */
+		atomic_long_cmpxchg(done, old, ugen);
+	}
+}
+
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
 	struct flush_tlb_info *info;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 47a0a3ccb7b1a..31efc88ce911a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,6 +1403,7 @@ struct task_struct {
 #if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH)
 	int luf_no_shootdown;
 	int luf_takeoff_started;
+	unsigned long luf_ugen;
 #endif
 
 	struct tlbflush_unmap_batch	tlb_ubc;
diff --git a/mm/internal.h b/mm/internal.h
index 2429db598e265..9fccfd38e03f0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1276,6 +1276,7 @@ void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
 void try_to_unmap_flush_takeoff(void);
 void flush_tlb_batched_pending(struct mm_struct *mm);
+void reset_batch(struct tlbflush_unmap_batch *batch);
 void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset);
 void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src);
 #else
@@ -1291,6 +1292,9 @@ static inline void try_to_unmap_flush_takeoff(void)
 static inline void flush_tlb_batched_pending(struct mm_struct *mm)
 {
 }
+static inline void reset_batch(struct tlbflush_unmap_batch *batch)
+{
+}
 static inline void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset)
 {
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 530c5c16ab323..7b023b34d53da 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -668,9 +668,11 @@ bool luf_takeoff_start(void)
  */
 void luf_takeoff_end(void)
 {
+	struct tlbflush_unmap_batch *tlb_ubc_takeoff = &current->tlb_ubc_takeoff;
 	unsigned long flags;
 	bool no_shootdown;
 	bool outmost = false;
+	unsigned long cur_luf_ugen;
 
 	local_irq_save(flags);
 	VM_WARN_ON(!current->luf_takeoff_started);
@@ -697,10 +699,19 @@ void luf_takeoff_end(void)
 	if (no_shootdown)
 		goto out;
 
+	cur_luf_ugen = current->luf_ugen;
+
+	current->luf_ugen = 0;
+
+	if (cur_luf_ugen && arch_tlbbatch_diet(&tlb_ubc_takeoff->arch, cur_luf_ugen))
+		reset_batch(tlb_ubc_takeoff);
+
 	try_to_unmap_flush_takeoff();
 out:
-	if (outmost)
+	if (outmost) {
 		VM_WARN_ON(current->luf_no_shootdown);
+		VM_WARN_ON(current->luf_ugen);
+	}
 }
 
 /*
@@ -757,6 +768,7 @@ bool luf_takeoff_check_and_fold(struct page *page)
 	struct tlbflush_unmap_batch *tlb_ubc_takeoff = &current->tlb_ubc_takeoff;
 	unsigned short luf_key = page_luf_key(page);
 	struct luf_batch *lb;
+	unsigned long lb_ugen;
 	unsigned long flags;
 
 	/*
@@ -770,13 +782,25 @@ bool luf_takeoff_check_and_fold(struct page *page)
 	if (!luf_key)
 		return true;
 
-	if (current->luf_no_shootdown)
-		return false;
-
 	lb = &luf_batch[luf_key];
 	read_lock_irqsave(&lb->lock, flags);
+	lb_ugen = lb->ugen;
+
+	if (arch_tlbbatch_check_done(&lb->batch.arch, lb_ugen)) {
+		read_unlock_irqrestore(&lb->lock, flags);
+		return true;
+	}
+
+	if (current->luf_no_shootdown) {
+		read_unlock_irqrestore(&lb->lock, flags);
+		return false;
+	}
+
 	fold_batch(tlb_ubc_takeoff, &lb->batch, false);
 	read_unlock_irqrestore(&lb->lock, flags);
+
+	if (!current->luf_ugen || ugen_before(current->luf_ugen, lb_ugen))
+		current->luf_ugen = lb_ugen;
 	return true;
 }
 #endif
diff --git a/mm/rmap.c b/mm/rmap.c
index 2191cf1d38270..579c75f46c170 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -656,7 +656,7 @@ static unsigned long new_luf_ugen(void)
 	return ugen;
 }
 
-static void reset_batch(struct tlbflush_unmap_batch *batch)
+void reset_batch(struct tlbflush_unmap_batch *batch)
 {
 	arch_tlbbatch_clear(&batch->arch);
 	batch->flush_required = false;
@@ -743,8 +743,14 @@ static void __fold_luf_batch(struct luf_batch *dst_lb,
 	 * more tlb shootdown might be needed to fulfill the newer
 	 * request.  Conservertively keep the newer one.
 	 */
-	if (!dst_lb->ugen || ugen_before(dst_lb->ugen, src_ugen))
+	if (!dst_lb->ugen || ugen_before(dst_lb->ugen, src_ugen)) {
+		/*
+		 * Good chance to shrink the batch using the old ugen.
+		 */
+		if (dst_lb->ugen && arch_tlbbatch_diet(&dst_lb->batch.arch, dst_lb->ugen))
+			reset_batch(&dst_lb->batch);
 		dst_lb->ugen = src_ugen;
+	}
 	fold_batch(&dst_lb->batch, src_batch, false);
 }
 
@@ -772,17 +778,45 @@ void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
 	read_unlock_irqrestore(&src->lock, flags);
 }
 
+static unsigned long tlb_flush_start(void)
+{
+	/*
+	 * Memory barrier implied in the atomic operation prevents
+	 * reading luf_ugen from happening after the following
+	 * tlb flush.
+	 */
+	return new_luf_ugen();
+}
+
+static void tlb_flush_end(struct arch_tlbflush_unmap_batch *arch,
+		struct mm_struct *mm, unsigned long ugen)
+{
+	/*
+	 * Prevent the following marking from placing prior to the
+	 * actual tlb flush.
+	 */
+	smp_mb();
+
+	if (arch)
+		arch_tlbbatch_mark_ugen(arch, ugen);
+	if (mm)
+		arch_mm_mark_ugen(mm, ugen);
+}
+
 void try_to_unmap_flush_takeoff(void)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 	struct tlbflush_unmap_batch *tlb_ubc_ro = &current->tlb_ubc_ro;
 	struct tlbflush_unmap_batch *tlb_ubc_luf = &current->tlb_ubc_luf;
 	struct tlbflush_unmap_batch *tlb_ubc_takeoff = &current->tlb_ubc_takeoff;
+	unsigned long ugen;
 
 	if (!tlb_ubc_takeoff->flush_required)
 		return;
 
+	ugen = tlb_flush_start();
 	arch_tlbbatch_flush(&tlb_ubc_takeoff->arch);
+	tlb_flush_end(&tlb_ubc_takeoff->arch, NULL, ugen);
 
 	/*
 	 * Now that tlb shootdown of tlb_ubc_takeoff has been performed,
@@ -871,13 +905,17 @@ void try_to_unmap_flush(void)
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 	struct tlbflush_unmap_batch *tlb_ubc_ro = &current->tlb_ubc_ro;
 	struct tlbflush_unmap_batch *tlb_ubc_luf = &current->tlb_ubc_luf;
+	unsigned long ugen;
 
 	fold_batch(tlb_ubc, tlb_ubc_ro, true);
 	fold_batch(tlb_ubc, tlb_ubc_luf, true);
 	if (!tlb_ubc->flush_required)
 		return;
 
+	ugen = tlb_flush_start();
 	arch_tlbbatch_flush(&tlb_ubc->arch);
+	tlb_flush_end(&tlb_ubc->arch, NULL, ugen);
+
 	reset_batch(tlb_ubc);
 }
 
@@ -1009,7 +1047,11 @@ void flush_tlb_batched_pending(struct mm_struct *mm)
 	int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;
 
 	if (pending != flushed) {
+		unsigned long ugen;
+
+		ugen = tlb_flush_start();
 		arch_flush_tlb_batched_pending(mm);
+		tlb_flush_end(NULL, mm, ugen);
 
 		/*
 		 * If the new TLB flushing is pending during flushing, leave
-- 
2.17.1



  parent reply	other threads:[~2025-02-26 12:02 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-20  5:20 [RFC PATCH v12 00/26] LUF(Lazy Unmap Flush) reducing tlb numbers over 90% Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 01/26] x86/tlb: add APIs manipulating tlb batch's arch data Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 02/26] arm64/tlbflush: " Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 03/26] riscv/tlb: " Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 04/26] x86/tlb, riscv/tlb, mm/rmap: separate arch_tlbbatch_clear() out of arch_tlbbatch_flush() Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 05/26] mm/buddy: make room for a new variable, luf_key, in struct page Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 06/26] mm: move should_skip_kasan_poison() to mm/internal.h Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 07/26] mm: introduce luf_ugen to be used as a global timestamp Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 08/26] mm: introduce luf_batch to be used as hash table to store luf meta data Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 09/26] mm: introduce API to perform tlb shootdown on exit from page allocator Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 10/26] mm: introduce APIs to check if the page allocation is tlb shootdownable Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 11/26] mm: deliver luf_key to pcp or buddy on free after unmapping Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 12/26] mm: delimit critical sections to take off pages from pcp or buddy alloctor Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 13/26] mm: introduce pend_list in struct free_area to track luf'd pages Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 14/26] mm/rmap: recognize read-only tlb entries during batched tlb flush Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 15/26] fs, filemap: refactor to gather the scattered ->write_{begin,end}() calls Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 16/26] mm: implement LUF(Lazy Unmap Flush) defering tlb flush when folios get unmapped Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 17/26] x86/tlb, riscv/tlb, arm64/tlbflush, mm: remove cpus from tlb shootdown that already have been done Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 18/26] mm/page_alloc: retry 3 times to take pcp pages on luf check failure Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 19/26] mm: skip luf tlb flush for luf'd mm that already has been done Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 20/26] mm, fs: skip tlb flushes for luf'd filemap " Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 21/26] mm: perform luf tlb shootdown per zone in batched manner Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 22/26] mm/page_alloc: not allow to tlb shootdown if !preemptable() && non_luf_pages_ok() Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 23/26] mm: separate move/undo parts from migrate_pages_batch() Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 24/26] mm/migrate: apply luf mechanism to unmapping during migration Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 25/26] mm/vmscan: apply luf mechanism to unmapping during folio reclaim Byungchul Park
2025-02-20  5:20 ` [RFC PATCH v12 26/26] mm/luf: implement luf debug feature Byungchul Park
2025-02-20 10:32 ` [RFC PATCH v12 00/26] LUF(Lazy Unmap Flush) reducing tlb numbers over 90% Hillf Danton
2025-02-20 10:51   ` Byungchul Park
2025-02-20 11:09   ` Byungchul Park
2025-02-20 11:49     ` Hillf Danton
2025-02-20 12:20       ` Byungchul Park
2025-02-20 12:40       ` Byungchul Park
2025-02-20 13:54       ` Matthew Wilcox
2025-02-20 15:09         ` Steven Rostedt
2025-02-20 22:53           ` Kent Overstreet
2025-02-20 23:05             ` Steven Rostedt
2025-02-20 23:21               ` Kent Overstreet
2025-02-20 23:25           ` Hillf Danton
2025-02-20 23:44             ` Steven Rostedt
     [not found]             ` <20250221230556.2479-1-hdanton@sina.com>
2025-02-22  7:16               ` Greg KH
     [not found]               ` <20250222101100.2531-1-hdanton@sina.com>
2025-02-22 13:57                 ` Greg KH
2025-03-10 23:24       ` Dan Williams
2025-03-10 23:53         ` Barry Song
     [not found]       ` <20250619134922.1219-1-hdanton@sina.com>
2025-06-20 17:00         ` Dan Williams
2025-02-20 15:15 ` Dave Hansen
2025-02-20 15:29   ` Vlastimil Babka
2025-02-20 23:37     ` Byungchul Park
2025-02-26 11:30       ` RFC v12 rebased on v6.14-rc4 Byungchul Park
2025-02-26 12:03         ` [RFC PATCH v12 based on v6.14-rc4 01/25] x86/tlb: add APIs manipulating tlb batch's arch data Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 02/25] arm64/tlbflush: " Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 03/25] riscv/tlb: " Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 04/25] x86/tlb, riscv/tlb, mm/rmap: separate arch_tlbbatch_clear() out of arch_tlbbatch_flush() Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 05/25] mm/buddy: make room for a new variable, luf_key, in struct page Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 06/25] mm: move should_skip_kasan_poison() to mm/internal.h Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 07/25] mm: introduce luf_ugen to be used as a global timestamp Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 08/25] mm: introduce luf_batch to be used as hash table to store luf meta data Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 09/25] mm: introduce API to perform tlb shootdown on exit from page allocator Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 10/25] mm: introduce APIs to check if the page allocation is tlb shootdownable Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 11/25] mm: deliver luf_key to pcp or buddy on free after unmapping Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 12/25] mm: delimit critical sections to take off pages from pcp or buddy alloctor Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 13/25] mm: introduce pend_list in struct free_area to track luf'd pages Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 14/25] mm/rmap: recognize read-only tlb entries during batched tlb flush Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 15/25] fs, filemap: refactor to gather the scattered ->write_{begin,end}() calls Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 16/25] mm: implement LUF(Lazy Unmap Flush) defering tlb flush when folios get unmapped Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 17/25] x86/tlb, riscv/tlb, arm64/tlbflush, mm: remove cpus from tlb shootdown that already have been done Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 18/25] mm/page_alloc: retry 3 times to take pcp pages on luf check failure Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 19/25] mm: skip luf tlb flush for luf'd mm that already has been done Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 20/25] mm, fs: skip tlb flushes for luf'd filemap " Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 21/25] mm: perform luf tlb shootdown per zone in batched manner Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 22/25] mm/page_alloc: not allow to tlb shootdown if !preemptable() && non_luf_pages_ok() Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 23/25] mm/migrate: apply luf mechanism to unmapping during migration Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 24/25] mm/vmscan: apply luf mechanism to unmapping during folio reclaim Byungchul Park
2025-02-26 12:03           ` [RFC PATCH v12 based on v6.14-rc4 25/25] mm/luf: implement luf debug feature Byungchul Park
2025-02-26 11:33       ` RFC v12 rebased on mm-unstable as of Feb 21, 2025 Byungchul Park
2025-02-26 12:01         ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 01/25] x86/tlb: add APIs manipulating tlb batch's arch data Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 02/25] arm64/tlbflush: " Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 03/25] riscv/tlb: " Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 04/25] x86/tlb, riscv/tlb, mm/rmap: separate arch_tlbbatch_clear() out of arch_tlbbatch_flush() Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 05/25] mm/buddy: make room for a new variable, luf_key, in struct page Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 06/25] mm: move should_skip_kasan_poison() to mm/internal.h Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 07/25] mm: introduce luf_ugen to be used as a global timestamp Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 08/25] mm: introduce luf_batch to be used as hash table to store luf meta data Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 09/25] mm: introduce API to perform tlb shootdown on exit from page allocator Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 10/25] mm: introduce APIs to check if the page allocation is tlb shootdownable Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 11/25] mm: deliver luf_key to pcp or buddy on free after unmapping Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 12/25] mm: delimit critical sections to take off pages from pcp or buddy alloctor Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 13/25] mm: introduce pend_list in struct free_area to track luf'd pages Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 14/25] mm/rmap: recognize read-only tlb entries during batched tlb flush Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 15/25] fs, filemap: refactor to gather the scattered ->write_{begin,end}() calls Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 16/25] mm: implement LUF(Lazy Unmap Flush) defering tlb flush when folios get unmapped Byungchul Park
2025-02-26 12:01           ` Byungchul Park [this message]
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 18/25] mm/page_alloc: retry 3 times to take pcp pages on luf check failure Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 19/25] mm: skip luf tlb flush for luf'd mm that already has been done Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 20/25] mm, fs: skip tlb flushes for luf'd filemap " Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 21/25] mm: perform luf tlb shootdown per zone in batched manner Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 22/25] mm/page_alloc: not allow to tlb shootdown if !preemptable() && non_luf_pages_ok() Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 23/25] mm/migrate: apply luf mechanism to unmapping during migration Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 24/25] mm/vmscan: apply luf mechanism to unmapping during folio reclaim Byungchul Park
2025-02-26 12:01           ` [RFC PATCH v12 based on mm-unstable as of Feb 21, 2025 25/25] mm/luf: implement luf debug feature Byungchul Park
2025-02-22  1:14     ` [RFC PATCH v12 00/26] LUF(Lazy Unmap Flush) reducing tlb numbers over 90% Shakeel Butt
2025-02-20 23:23   ` Byungchul Park

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250226120132.28469-17-byungchul@sk.com \
    --to=byungchul@sk.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=kernel_team@skhynix.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rjgolo@gmail.com \
    --cc=tglx@linutronix.de \
    --cc=vernhao@tencent.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox