From: Chen Yucong <slaoub@gmail.com>
To: ak@linux.intel.com
Cc: fengguang.wu@intel.com, n-horiguchi@ah.jp.nec.com,
linux-mm@kvack.org, Chen Yucong <slaoub@gmail.com>
Subject: [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page
Date: Thu, 15 May 2014 19:16:16 +0800 [thread overview]
Message-ID: <1400152576-32004-1-git-send-email-slaoub@gmail.com> (raw)
We assume that there have three processes P1, P2, and P3 which share a
page frame PF0. PF0 have a multi-bit error that has not yet been detected.
At some point, P1 has access to PF0 for loading data or fetching instruction.
So a MCE related to PF0 has been raised and all logical processors have to
entry do_machine_check() due to MCE-Broadcast mechanism. But only P1 need to
execute memory_failure() due to TIF_MCE_NOTIFY signal. During the process of
executing memory_failure(), both P2 and P3 have a probability of accessing
the PF0. So they also can entry into memory_failure(), but they will return
very soon due to the PG_hwpoison has been set by P1. However, P1 may not
really isolate PF0, for example try_to_unmap() has not yet been executed. As
a result, P1/P2 may raise the same MCE again.
Signed-off-by: Chen Yucong <slaoub@gmail.com>
---
include/linux/page-flags.h | 6 ++++++
include/linux/pagemap.h | 27 +++++++++++++++++++++++++++
mm/filemap.c | 30 ++++++++++++++++++++++++++++++
mm/memory-failure.c | 11 +++++++++++
4 files changed, 74 insertions(+)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959..c5c99a5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -105,6 +105,7 @@ enum pageflags {
#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
+ PG_hwpoison_lock,
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
@@ -271,10 +272,15 @@ PAGEFLAG_FALSE(Uncached)
#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison)
TESTSCFLAG(HWPoison, hwpoison)
+PAGEFLAG(HWPoisonLock, hwpoison_lock)
+TESTSCFLAG(HWPoisonLock, hwpoison_lock)
#define __PG_HWPOISON (1UL << PG_hwpoison)
+#define __PG_HWPOISON_LOCK (1UL << PG_hwpoison_lock)
#else
PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
+PAGEFLAG_FALSE(HWPoisonLock)
+#define __PG_HWPOISON_LOCK 0
#endif
u64 stable_page_flags(struct page *page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c74f8bb..9c3bdcb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -508,6 +508,33 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
}
+#ifdef CONFIG_MEMORY_FAILURE
+extern void __lock_hwpoison_page(struct page *page);
+extern void unlock_hwpoison_page(struct page *page);
+
+static inline void __set_page_hwpoison_lock(struct page *page)
+{
+ __set_bit(PG_hwpoison_lock, &page->flags);
+}
+
+static inline void __clear_page_hwpoison_lock(struct page *page)
+{
+ __clear_bit(PG_hwpoison_lock, &page->flags);
+}
+
+static inline int trylock_hwpoison_page(struct page *page)
+{
+ return likely(!test_and_set_bit_lock(PG_hwpoison_lock, &page->flags));
+}
+
+static inline void lock_hwpoison_page(struct page *page)
+{
+ might_sleep();
+ if (!trylock_hwpoison_page(page))
+ __lock_hwpoison_page(page);
+}
+#endif
+
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly!
diff --git a/mm/filemap.c b/mm/filemap.c
index bec4b9b..9162527 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -843,6 +843,36 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
}
}
+#ifdef CONFIG_MEMORY_FAILURE
+/**
+ * unlock_hwpoison_page -unlock a poisonous page
+ * @page: the page
+ * Unlocks the poisonous page and wakes up sleeper in __wait_on_bit_lock().
+ */
+void unlock_hwpoison_page(struct page *page)
+{
+ VM_BUG_ON(!PageHWPoisonLock(page));
+ clear_bit_unlock(PG_hwpoison_lock, &page->flags);
+ smp_mb__after_clear_bit();
+ wake_up_page(page, PG_hwpoison_lock);
+}
+EXPORT_SYMBOL(unlock_hwpoison_page);
+
+/**
+ * __lock_hwpoison_page - get a lock one the poisonous page, assuming
+ * we need to sleep to get it
+ * @page: the posionous page to lock
+ */
+void __lock_hwpoison_page(struct page *page)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_hwpoison_lock);
+
+ __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
+ TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__lock_hwpoison_page);
+#endif
+
/**
* page_cache_next_hole - find the next hole (not-present entry)
* @mapping: mapping
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9872af1..f839a54 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1041,9 +1041,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
p = pfn_to_page(pfn);
+ lock_hwpoison_page(p);
hpage = compound_head(p);
if (TestSetPageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
+ unlock_hwpoison_page(p);
return 0;
}
@@ -1078,6 +1080,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
!get_page_unless_zero(hpage)) {
if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy", DELAYED);
+ unlock_hwpoison_page(p);
return 0;
} else if (PageHuge(hpage)) {
/*
@@ -1089,6 +1092,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|| (p != hpage && TestSetPageHWPoison(hpage))) {
atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
}
@@ -1097,9 +1101,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
action_result(pfn, "free huge",
res ? IGNORED : DELAYED);
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return res;
} else {
action_result(pfn, "high order kernel", IGNORED);
+ unlock_hwpoison_page(p);
return -EBUSY;
}
}
@@ -1124,10 +1130,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
action_result(pfn, "free buddy", DELAYED);
else
action_result(pfn, "free buddy, 2nd try", DELAYED);
+ unlock_hwpoison_page(p);
return 0;
}
action_result(pfn, "non LRU", IGNORED);
put_page(p);
+ unlock_hwpoison_page(p);
return -EBUSY;
}
}
@@ -1161,6 +1169,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
put_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
@@ -1173,6 +1182,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
IGNORED);
unlock_page(hpage);
put_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
/*
@@ -1228,6 +1238,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
res = page_action(ps, p, pfn);
out:
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return res;
}
EXPORT_SYMBOL_GPL(memory_failure);
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2014-05-15 11:18 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-15 11:16 Chen Yucong [this message]
2014-05-15 14:38 ` Andi Kleen
2014-05-15 14:52 ` Naoya Horiguchi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1400152576-32004-1-git-send-email-slaoub@gmail.com \
--to=slaoub@gmail.com \
--cc=ak@linux.intel.com \
--cc=fengguang.wu@intel.com \
--cc=linux-mm@kvack.org \
--cc=n-horiguchi@ah.jp.nec.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox