linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Andrea Arcangeli <aarcange@redhat.com>,
	Avi Kivity <avi@redhat.com>, Thomas Gleixner <tglx@linutronix.de>,
	Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@elte.hu>,
	akpm@linux-foundation.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-mm@kvack.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	David Miller <davem@davemloft.net>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Mel Gorman <mel@csn.ul.ie>, Nick Piggin <npiggin@kernel.dk>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Paul McKenney <paulmck@linux.vnet.ibm.com>,
	Yanmin Zhang <yanmin_zhang@linux.intel.com>
Subject: [PATCH 25/25] mm, arch: Ensure we never tlb_flush_mmu() from atomic context
Date: Tue, 25 Jan 2011 18:31:36 +0100	[thread overview]
Message-ID: <20110125174908.531253165@chello.nl> (raw)
In-Reply-To: <20110125173111.720927511@chello.nl>

[-- Attachment #1: mm-flush-vs-pte_lock.patch --]
[-- Type: text/plain, Size: 10661 bytes --]

Hugh noted that we could still end up flushing the batch from atomic
context because we do tlb_remove_page() while holding the pte_lock.

This will still generate immense latencies, more so now than ever
before due to the larger batches. Break tlb_remove_page() into two
functions, one that queues the page and one that flushes the queue.

Leave the tlb_remove_page() interface for now with the old semantics
but add a might_sleep() in there to detect callers from atomic
contexts.

XXX should probably fold back into the mmu_gather preempt patches for
the various architectures.

Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/arm/include/asm/tlb.h  |   17 ++++++++++++++++-
 arch/ia64/include/asm/tlb.h |   22 ++++++++++++++++++----
 arch/s390/include/asm/tlb.h |   18 ++++++++++++------
 arch/sh/include/asm/tlb.h   |   17 ++++++++++++++++-
 arch/um/include/asm/tlb.h   |   15 +++++++++++----
 include/asm-generic/tlb.h   |   22 +++++++++++++++-------
 mm/memory.c                 |   14 +++++++++++---
 7 files changed, 99 insertions(+), 26 deletions(-)

Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -146,7 +146,7 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
 }
 
 static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
 
@@ -176,7 +176,7 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
@@ -193,7 +193,7 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
  *	handling the additional races in SMP caused by other CPUs caching valid
  *	mappings in their TLBs.
  */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 
@@ -201,17 +201,25 @@ static inline void tlb_remove_page(struc
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-		return;
+		return 0;
 	}
 
 	batch = tlb->active;
+	batch->pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
-			tlb_flush_mmu(tlb, 0, 0);
-		batch = tlb->active;
+			return 1;
 	}
 
-	batch->pages[batch->nr++] = page;
+	return 0;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	might_sleep();
+
+	if (__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /**
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -990,11 +990,12 @@ static unsigned long zap_pte_range(struc
 {
 	struct mm_struct *mm = tlb->mm;
 	int rss[NR_MM_COUNTERS];
+	int need_flush = 0;
 	spinlock_t *ptl;
 	pte_t *pte;
 
 	init_rss_vec(rss);
-
+again:
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	do {
@@ -1048,7 +1049,7 @@ static unsigned long zap_pte_range(struc
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			tlb_remove_page(tlb, page);
+			need_flush = __tlb_remove_page(tlb, page);
 			continue;
 		}
 		/*
@@ -1069,12 +1070,19 @@ static unsigned long zap_pte_range(struc
 				print_bad_pte(vma, addr, ptent, NULL);
 		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte++, addr += PAGE_SIZE, (addr != end && !need_flush));
 
 	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
+	if (need_flush) {
+		need_flush = 0;
+		tlb_flush_mmu(tlb);
+		if (addr != end)
+			goto again;
+	}
+
 	return addr;
 }
 
Index: linux-2.6/arch/arm/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/tlb.h
+++ linux-2.6/arch/arm/include/asm/tlb.h
@@ -93,7 +93,22 @@ tlb_end_vma(struct mmu_gather *tlb, stru
 		flush_tlb_range(vma, tlb->range_start, tlb->range_end);
 }
 
-#define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 0;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	might_sleep();
+	__tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
 #define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 
Index: linux-2.6/arch/ia64/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/tlb.h
+++ linux-2.6/arch/ia64/include/asm/tlb.h
@@ -204,14 +204,13 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
  * this file).
  */
-static inline void
-tlb_remove_page (struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-		return;
+		return 0;
 	}
 
 	if (!tlb->nr && tlb->pages == tlb->local)
@@ -219,7 +218,22 @@ tlb_remove_page (struct mmu_gather *tlb,
 
 	tlb->pages[tlb->nr++] = page;
 	if (tlb->nr >= tlb->max)
-		ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+		return 1;
+
+	return 0;
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	might_sleep();
+
+	if (__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /*
Index: linux-2.6/arch/s390/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/tlb.h
+++ linux-2.6/arch/s390/include/asm/tlb.h
@@ -64,8 +64,7 @@ static inline void tlb_gather_mmu(struct
 	tlb->nr_pxds = tlb->max;
 }
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb,
-				 unsigned long start, unsigned long end)
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
 		__tlb_flush_mm(tlb->mm);
@@ -78,7 +77,7 @@ static inline void tlb_flush_mmu(struct 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	rcu_table_freelist_finish();
 
@@ -94,8 +93,15 @@ static inline void tlb_finish_mmu(struct
  * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 0;
+}
+
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	might_sleep();
 	free_page_and_swap_cache(page);
 }
 
@@ -109,7 +115,7 @@ static inline void pte_free_tlb(struct m
 	if (!tlb->fullmm) {
 		tlb->array[tlb->nr_ptes++] = pte;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		page_table_free(tlb->mm, (unsigned long *) pte);
 }
@@ -130,7 +136,7 @@ static inline void pmd_free_tlb(struct m
 	if (!tlb->fullmm) {
 		tlb->array[--tlb->nr_pxds] = pmd;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
@@ -152,7 +158,7 @@ static inline void pud_free_tlb(struct m
 	if (!tlb->fullmm) {
 		tlb->array[--tlb->nr_pxds] = pud;
 		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb, 0, 0);
+			tlb_flush_mmu(tlb);
 	} else
 		crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
Index: linux-2.6/arch/sh/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/tlb.h
+++ linux-2.6/arch/sh/include/asm/tlb.h
@@ -83,7 +83,22 @@ tlb_end_vma(struct mmu_gather *tlb, stru
 	}
 }
 
-#define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+}
+
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 0;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	might_sleep();
+	__tlb_remove_page(tlb, page);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
 #define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
Index: linux-2.6/arch/um/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/tlb.h
+++ linux-2.6/arch/um/include/asm/tlb.h
@@ -57,7 +57,7 @@ extern void flush_tlb_mm_range(struct mm
 			       unsigned long end);
 
 static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	if (!tlb->need_flush)
 		return;
@@ -73,7 +73,7 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
@@ -84,11 +84,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
  *	while handling the additional races in SMP caused by other CPUs
  *	caching valid mappings in their TLBs.
  */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	free_page_and_swap_cache(page);
-	return;
+	return 0;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	might_sleep();
+
+	__tlb_remove_page(tlb, page);
 }
 
 /**


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2011-01-25 17:59 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-25 17:31 [PATCH 00/25] mm: Preemptibility -v7 Peter Zijlstra
2011-01-25 17:31 ` [PATCH 01/25] tile: Fix __pte_free_tlb Peter Zijlstra
2011-02-04 20:39   ` Chris Metcalf
2011-02-07 13:55     ` Peter Zijlstra
2011-02-23 20:59       ` Chris Metcalf
2011-01-25 17:31 ` [PATCH 02/25] mm: Preemptible mmu_gather Peter Zijlstra
2011-01-25 17:31 ` [PATCH 03/25] powerpc: " Peter Zijlstra
2011-01-25 17:31 ` [PATCH 04/25] sparc: " Peter Zijlstra
2011-01-25 20:30   ` David Miller
2011-01-25 17:31 ` [PATCH 05/25] s390: preemptible mmu_gather Peter Zijlstra
2011-01-25 17:31 ` [PATCH 06/25] arm: Preemptible mmu_gather Peter Zijlstra
2011-01-25 17:31 ` [PATCH 07/25] sh: " Peter Zijlstra
2011-01-25 17:31 ` [PATCH 08/25] um: " Peter Zijlstra
2011-01-25 17:31 ` [PATCH 09/25] ia64: " Peter Zijlstra
2011-01-25 20:12   ` Tony Luck
2011-01-25 20:22     ` Peter Zijlstra
2011-01-25 21:23       ` Tony Luck
2011-01-26 11:01         ` Peter Zijlstra
2011-01-25 17:31 ` [PATCH 10/25] mm: Now that all old mmu_gather code is gone, remove the storage Peter Zijlstra
2011-01-25 17:31 ` [PATCH 11/25] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2011-01-25 17:31 ` [PATCH 12/25] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
2011-01-25 17:31 ` [PATCH 13/25] mutex: Provide mutex_is_contended Peter Zijlstra
2011-01-25 17:31 ` [PATCH 14/25] mm: Convert i_mmap_lock to a mutex Peter Zijlstra
2011-01-25 17:31 ` [PATCH 15/25] mm: Extended batches for generic mmu_gather Peter Zijlstra
2011-01-25 17:31 ` [PATCH 16/25] mm: Revert page_lock_anon_vma() lock annotation Peter Zijlstra
2011-01-25 17:31 ` [PATCH 17/25] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
2011-01-25 17:31 ` [PATCH 18/25] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
2011-01-25 17:31 ` [PATCH 19/25] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
2011-01-25 17:31 ` [PATCH 20/25] mm: Simplify anon_vma refcounts Peter Zijlstra
2011-01-25 20:16   ` Linus Torvalds
2011-01-25 20:31     ` Peter Zijlstra
2011-01-25 20:37       ` Linus Torvalds
2011-01-25 17:31 ` [PATCH 21/25] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
2011-01-25 17:31 ` [PATCH 22/25] mm: Convert anon_vma->lock to a mutex Peter Zijlstra
2011-02-03  5:27   ` KOSAKI Motohiro
2011-02-03 15:04     ` Peter Zijlstra
2011-02-04  4:35       ` KOSAKI Motohiro
2011-01-25 17:31 ` [PATCH 23/25] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
2011-01-25 17:31 ` [PATCH 24/25] mm: Remove i_mmap_mutex lockbreak Peter Zijlstra
2011-01-25 17:31 ` Peter Zijlstra [this message]
2011-01-25 18:32 ` [PATCH 00/25] mm: Preemptibility -v7 Sam Ravnborg
2011-01-25 19:28   ` Peter Zijlstra
2011-01-25 19:41     ` Sam Ravnborg
2011-01-25 19:45 ` Andi Kleen
     [not found]   ` <1295987985.28776.1118.camel@laptop>
2011-01-25 20:47     ` Andi Kleen
2011-01-25 21:09       ` Peter Zijlstra
2011-01-26 13:13 ` [RFC][PATCH 26/25] mm, arch: Convert ia64, arm, sh to generic tlb Peter Zijlstra
2011-02-17 12:06 ` [PATCH 00/25] mm: Preemptibility -v7 Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110125174908.531253165@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=davem@davemloft.net \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=npiggin@kernel.dk \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox