Re: [Bug] Reproducible data corruption on i5-3340M: Please continue your great work! :-)

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Linus Torvalds <torvalds@linux-foundation.org>
To: Ben Tebulin <tebulin@googlemail.com>
Cc: Michal Hocko <mhocko@suse.cz>, Mel Gorman <mgorman@suse.de>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Balbir Singh <bsingharora@gmail.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	linux-mm <linux-mm@kvack.org>, Rik van Riel <riel@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>
Subject: Re: [Bug] Reproducible data corruption on i5-3340M: Please continue your great work! :-)
Date: Thu, 15 Aug 2013 17:33:28 -0700	[thread overview]
Message-ID: <CA+55aFwFx7uhtDTX5vfiYRo+keLmuvxvSFupU4nB8g1KCN-WVg@mail.gmail.com> (raw)
In-Reply-To: <520D5ED2.9040403@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1595 bytes --]

On Thu, Aug 15, 2013 at 4:05 PM, Ben Tebulin <tebulin@googlemail.com> wrote:
>
>> Ben, please test. I'm worried that the problem you see is something
>> even more fundamentally wrong with the whole "oops, must flush in the
>> middle" logic, but I'm _hoping_ this fixes it.
>
> It's gone.
>
> Really!
>
> I git-fsck'ed successfully around 30 times in a row.
> And even all the other things still seem to work ;-)

Goodie. I think I'm just going to commit it (with the speling fixes
for other architectures) asap. It's bigger than I'd like, but it's a
lot simpler than the alternatives of trying to figure out exactly
which call chain got things wrong with the previous confusing model.

Thanks for bisecting and testing.

> Honestly I have to confess that I'm deeply impressed how this finally
> worked out: I just threw a particular, innocent-looking commit hash and
> nothing more into the round.

Being able to bisect the exact commit that introduced the bad behavior
is *very* powerful debugging aid, and in fact the smaller and more
innocent-looking the bisected commit is, the easier it generally is to
then say "ok, it must be related to this one particular issue". So the
bisection really pinpointed the area. After that it was just a matter
of reading the source code and seeing what looked suspicious.

I'll probably delay committing it until tomorrow, in the hope that
somebody using one of the other architectures will at least ack that
it compiles. I'm re-attaching the patch (with the two "logn" -> "long"
fixes) just to encourage that. Hint hint, everybody..

               Linus

[-- Attachment #2: patch.diff --]
[-- Type: application/octet-stream, Size: 10899 bytes --]

 arch/arm/include/asm/tlb.h   |  7 +++++--
 arch/arm64/include/asm/tlb.h |  7 +++++--
 arch/ia64/include/asm/tlb.h  |  9 ++++++---
 arch/s390/include/asm/tlb.h  |  8 ++++++--
 arch/sh/include/asm/tlb.h    |  6 ++++--
 arch/um/include/asm/tlb.h    |  6 ++++--
 fs/exec.c                    |  4 ++--
 include/asm-generic/tlb.h    |  2 +-
 mm/hugetlb.c                 |  2 +-
 mm/memory.c                  | 36 +++++++++++++++++++++---------------
 mm/mmap.c                    |  4 ++--
 11 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 46e7cfb3e721..0baf7f0d9394 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 46b3beb4b773..717031a762c2 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -35,6 +35,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -97,10 +98,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index ef3a9de01954..bc5efc7c3f3f 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -22,7 +22,7 @@
  * unmapping a portion of the virtual address space, these hooks are called according to
  * the following template:
  *
- *	tlb <- tlb_gather_mmu(mm, full_mm_flush);	// start unmap for address space MM
+ *	tlb <- tlb_gather_mmu(mm, start, end);		// start unmap for address space MM
  *	{
  *	  for each vma that needs a shootdown do {
  *	    tlb_start_vma(tlb, vma);
@@ -58,6 +58,7 @@ struct mmu_gather {
 	unsigned int		max;
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start, end;
 	unsigned long		start_addr;
 	unsigned long		end_addr;
 	struct page		**pages;
@@ -155,13 +156,15 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
 	tlb->nr = 0;
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->start_addr = ~0UL;
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d686684..23a64d25f2b1 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
 	struct mm_struct *mm;
 	struct mmu_table_batch *batch;
 	unsigned int fullmm;
+	unsigned long start, unsigned long end;
 };
 
 struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
-				  unsigned int full_mm_flush)
+				  unsigned long start,
+				  unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 	tlb->batch = NULL;
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index e61d43d9f689..362192ed12fe 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,10 +36,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 4febacd1a8a1..29b0301c18aa 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,10 +45,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def87642..fd774c7cb483 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, old_start, old_end);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
 	}
-	tlb_finish_mmu(&tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, old_start, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 13821c339a41..5672d7ea1fa0 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 83aff0a4d093..b60f33080a28 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2490,7 +2490,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 
 	mm = vma->vm_mm;
 
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
 	tlb_finish_mmu(&tlb, start, end);
 }
diff --git a/mm/memory.c b/mm/memory.c
index 40268410732a..af84bc0ec17c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -209,14 +209,15 @@ static int tlb_next_batch(struct mmu_gather *tlb)
  *	tear-down from @mm. The @fullmm argument is used when @mm is without
  *	users and we're going to destroy the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
-	tlb->fullmm     = fullmm;
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
 	tlb->need_flush_all = 0;
-	tlb->start	= -1UL;
-	tlb->end	= 0;
+	tlb->start	= start;
+	tlb->end	= end;
 	tlb->need_flush = 0;
 	tlb->local.next = NULL;
 	tlb->local.nr   = 0;
@@ -256,8 +257,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb->start = start;
-	tlb->end   = end;
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -1099,7 +1098,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	spinlock_t *ptl;
 	pte_t *start_pte;
 	pte_t *pte;
-	unsigned long range_start = addr;
 
 again:
 	init_rss_vec(rss);
@@ -1205,17 +1203,25 @@ again:
 	 * and page-free while holding it.
 	 */
 	if (force_flush) {
+		unsigned long old_end;
+
 		force_flush = 0;
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-		tlb->start = range_start;
+		/*
+		 * Flush the TLB just for the previous segment,
+		 * then update the range to be the remaining
+		 * TLB range.
+		 */
+		old_end = tlb->end;
 		tlb->end = addr;
-#endif
+
 		tlb_flush_mmu(tlb);
-		if (addr != end) {
-			range_start = addr;
+
+		tlb->start = addr;
+		tlb->end = old_end;
+
+		if (addr != end)
 			goto again;
-		}
 	}
 
 	return addr;
@@ -1400,7 +1406,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end = start + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1426,7 +1432,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
 	unsigned long end = address + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, address, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, address, end);
 	unmap_single_vma(&tlb, vma, address, end, details);
diff --git a/mm/mmap.c b/mm/mmap.c
index 1edbaa3136c3..f9c97d10b873 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2336,7 +2336,7 @@ static void unmap_region(struct mm_struct *mm,
 	struct mmu_gather tlb;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end);
 	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
@@ -2709,7 +2709,7 @@ void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb_gather_mmu(&tlb, mm, 1);
+	tlb_gather_mmu(&tlb, mm, 0, -1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);

next prev parent reply	other threads:[~2013-08-16  0:33 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <52050382.9060802@gmail.com>
2013-08-14 16:36 ` [Bug] Reproducible data corruption on i5-3340M: Please revert 53a59fc67! Ben Tebulin
2013-08-14 17:40   ` Michal Hocko
2013-08-14 17:58     ` Michal Hocko
2013-08-14 18:03     ` Linus Torvalds
2013-08-14 18:28       ` Michal Hocko
2013-08-14 18:35         ` Linus Torvalds
2013-08-15  9:25           ` Ben Tebulin
2013-08-15 12:02             ` Linus Torvalds
2013-08-15 12:37               ` Ben Tebulin
2013-08-15 13:40               ` Michal Hocko
2013-08-15 14:46                 ` Michal Hocko
2013-08-15 14:53                   ` Michal Hocko
2013-08-15 15:14                     ` Michal Hocko
2013-08-15 18:00               ` Linus Torvalds
2013-08-15 18:29                 ` Bjørn Mork
2013-08-15 18:42                   ` Linus Torvalds
2013-08-15 23:05                 ` [Bug] Reproducible data corruption on i5-3340M: Please continue your great work! :-) Ben Tebulin
2013-08-16  0:33                   ` Linus Torvalds [this message]
2013-08-16  6:22                     ` Stephen Rothwell
2013-08-16  7:55                     ` richard -rw- weinberger
2013-08-16 11:00                     ` Michal Hocko
2013-08-16 11:28                       ` Peter Zijlstra
2013-08-16 23:40                     ` Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CA+55aFwFx7uhtDTX5vfiYRo+keLmuvxvSFupU4nB8g1KCN-WVg@mail.gmail.com \
    --to=torvalds@linux-foundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tebulin@googlemail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox