* [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page
@ 2014-05-15 11:16 Chen Yucong
2014-05-15 14:38 ` Andi Kleen
2014-05-15 14:52 ` Naoya Horiguchi
0 siblings, 2 replies; 3+ messages in thread
From: Chen Yucong @ 2014-05-15 11:16 UTC (permalink / raw)
To: ak; +Cc: fengguang.wu, n-horiguchi, linux-mm, Chen Yucong
We assume that there have three processes P1, P2, and P3 which share a
page frame PF0. PF0 have a multi-bit error that has not yet been detected.
At some point, P1 has access to PF0 for loading data or fetching instruction.
So a MCE related to PF0 has been raised and all logical processors have to
entry do_machine_check() due to MCE-Broadcast mechanism. But only P1 need to
execute memory_failure() due to TIF_MCE_NOTIFY signal. During the process of
executing memory_failure(), both P2 and P3 have a probability of accessing
the PF0. So they also can entry into memory_failure(), but they will return
very soon due to the PG_hwpoison has been set by P1. However, P1 may not
really isolate PF0, for example try_to_unmap() has not yet been executed. As
a result, P1/P2 may raise the same MCE again.
Signed-off-by: Chen Yucong <slaoub@gmail.com>
---
include/linux/page-flags.h | 6 ++++++
include/linux/pagemap.h | 27 +++++++++++++++++++++++++++
mm/filemap.c | 30 ++++++++++++++++++++++++++++++
mm/memory-failure.c | 11 +++++++++++
4 files changed, 74 insertions(+)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959..c5c99a5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -105,6 +105,7 @@ enum pageflags {
#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
+ PG_hwpoison_lock,
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
@@ -271,10 +272,15 @@ PAGEFLAG_FALSE(Uncached)
#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison)
TESTSCFLAG(HWPoison, hwpoison)
+PAGEFLAG(HWPoisonLock, hwpoison_lock)
+TESTSCFLAG(HWPoisonLock, hwpoison_lock)
#define __PG_HWPOISON (1UL << PG_hwpoison)
+#define __PG_HWPOISON_LOCK (1UL << PG_hwpoison_lock)
#else
PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
+PAGEFLAG_FALSE(HWPoisonLock)
+#define __PG_HWPOISON_LOCK 0
#endif
u64 stable_page_flags(struct page *page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c74f8bb..9c3bdcb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -508,6 +508,33 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
}
+#ifdef CONFIG_MEMORY_FAILURE
+extern void __lock_hwpoison_page(struct page *page);
+extern void unlock_hwpoison_page(struct page *page);
+
+static inline void __set_page_hwpoison_lock(struct page *page)
+{
+ __set_bit(PG_hwpoison_lock, &page->flags);
+}
+
+static inline void __clear_page_hwpoison_lock(struct page *page)
+{
+ __clear_bit(PG_hwpoison_lock, &page->flags);
+}
+
+static inline int trylock_hwpoison_page(struct page *page)
+{
+ return likely(!test_and_set_bit_lock(PG_hwpoison_lock, &page->flags));
+}
+
+static inline void lock_hwpoison_page(struct page *page)
+{
+ might_sleep();
+ if (!trylock_hwpoison_page(page))
+ __lock_hwpoison_page(page);
+}
+#endif
+
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly!
diff --git a/mm/filemap.c b/mm/filemap.c
index bec4b9b..9162527 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -843,6 +843,36 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
}
}
+#ifdef CONFIG_MEMORY_FAILURE
+/**
+ * unlock_hwpoison_page -unlock a poisonous page
+ * @page: the page
+ * Unlocks the poisonous page and wakes up sleeper in __wait_on_bit_lock().
+ */
+void unlock_hwpoison_page(struct page *page)
+{
+ VM_BUG_ON(!PageHWPoisonLock(page));
+ clear_bit_unlock(PG_hwpoison_lock, &page->flags);
+ smp_mb__after_clear_bit();
+ wake_up_page(page, PG_hwpoison_lock);
+}
+EXPORT_SYMBOL(unlock_hwpoison_page);
+
+/**
+ * __lock_hwpoison_page - get a lock one the poisonous page, assuming
+ * we need to sleep to get it
+ * @page: the posionous page to lock
+ */
+void __lock_hwpoison_page(struct page *page)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_hwpoison_lock);
+
+ __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
+ TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__lock_hwpoison_page);
+#endif
+
/**
* page_cache_next_hole - find the next hole (not-present entry)
* @mapping: mapping
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9872af1..f839a54 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1041,9 +1041,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
p = pfn_to_page(pfn);
+ lock_hwpoison_page(p);
hpage = compound_head(p);
if (TestSetPageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
+ unlock_hwpoison_page(p);
return 0;
}
@@ -1078,6 +1080,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
!get_page_unless_zero(hpage)) {
if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy", DELAYED);
+ unlock_hwpoison_page(p);
return 0;
} else if (PageHuge(hpage)) {
/*
@@ -1089,6 +1092,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
|| (p != hpage && TestSetPageHWPoison(hpage))) {
atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
}
@@ -1097,9 +1101,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
action_result(pfn, "free huge",
res ? IGNORED : DELAYED);
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return res;
} else {
action_result(pfn, "high order kernel", IGNORED);
+ unlock_hwpoison_page(p);
return -EBUSY;
}
}
@@ -1124,10 +1130,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
action_result(pfn, "free buddy", DELAYED);
else
action_result(pfn, "free buddy, 2nd try", DELAYED);
+ unlock_hwpoison_page(p);
return 0;
}
action_result(pfn, "non LRU", IGNORED);
put_page(p);
+ unlock_hwpoison_page(p);
return -EBUSY;
}
}
@@ -1161,6 +1169,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
put_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
@@ -1173,6 +1182,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
IGNORED);
unlock_page(hpage);
put_page(hpage);
+ unlock_hwpoison_page(p);
return 0;
}
/*
@@ -1228,6 +1238,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
res = page_action(ps, p, pfn);
out:
unlock_page(hpage);
+ unlock_hwpoison_page(p);
return res;
}
EXPORT_SYMBOL_GPL(memory_failure);
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page
2014-05-15 11:16 [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page Chen Yucong
@ 2014-05-15 14:38 ` Andi Kleen
2014-05-15 14:52 ` Naoya Horiguchi
1 sibling, 0 replies; 3+ messages in thread
From: Andi Kleen @ 2014-05-15 14:38 UTC (permalink / raw)
To: Chen Yucong; +Cc: fengguang.wu, n-horiguchi, linux-mm
On Thu, May 15, 2014 at 07:16:16PM +0800, Chen Yucong wrote:
> We assume that there have three processes P1, P2, and P3 which share a
> page frame PF0. PF0 have a multi-bit error that has not yet been detected.
How likely is that? Did you see it in some real case?
> As
> a result, P1/P2 may raise the same MCE again.
And how is that a problem?
The memory error handling is always somewhat probabilistic. There are a
lot of corner cases that could be be handled, but it would
be even more complex than it already is, and most of them are unlikely
to happen. The more complexity the more risk of unintended bugs.
So the question is always how likely that case is, and is it worth
handling. It's far better to focus on the common case.
Another concern is always how to test this. Usually all explicit paths
should have test cases in mce-test.
But it's not clear to me the additional complexity here is justified.
-andi
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page
2014-05-15 11:16 [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page Chen Yucong
2014-05-15 14:38 ` Andi Kleen
@ 2014-05-15 14:52 ` Naoya Horiguchi
1 sibling, 0 replies; 3+ messages in thread
From: Naoya Horiguchi @ 2014-05-15 14:52 UTC (permalink / raw)
To: slaoub; +Cc: ak, Wu Fengguang, linux-mm
On Thu, May 15, 2014 at 07:16:16PM +0800, Chen Yucong wrote:
> We assume that there have three processes P1, P2, and P3 which share a
> page frame PF0. PF0 have a multi-bit error that has not yet been detected.
>
> At some point, P1 has access to PF0 for loading data or fetching instruction.
> So a MCE related to PF0 has been raised and all logical processors have to
> entry do_machine_check() due to MCE-Broadcast mechanism. But only P1 need to
> execute memory_failure() due to TIF_MCE_NOTIFY signal. During the process of
> executing memory_failure(), both P2 and P3 have a probability of accessing
> the PF0. So they also can entry into memory_failure(), but they will return
> very soon due to the PG_hwpoison has been set by P1. However, P1 may not
> really isolate PF0, for example try_to_unmap() has not yet been executed. As
> a result, P1/P2 may raise the same MCE again.
How serious is this problem? Concurrent accesses for multi-bit memory errors
don't happen so often, and even if that happens we just waste a few CPU cycles.
So it seems to me hard to justify adding a new page flag for this specific purpose.
Thanks,
Naoya Horiguchi
>
> Signed-off-by: Chen Yucong <slaoub@gmail.com>
> ---
> include/linux/page-flags.h | 6 ++++++
> include/linux/pagemap.h | 27 +++++++++++++++++++++++++++
> mm/filemap.c | 30 ++++++++++++++++++++++++++++++
> mm/memory-failure.c | 11 +++++++++++
> 4 files changed, 74 insertions(+)
>
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 8304959..c5c99a5 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -105,6 +105,7 @@ enum pageflags {
> #endif
> #ifdef CONFIG_MEMORY_FAILURE
> PG_hwpoison, /* hardware poisoned page. Don't touch */
> + PG_hwpoison_lock,
> #endif
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> PG_compound_lock,
> @@ -271,10 +272,15 @@ PAGEFLAG_FALSE(Uncached)
> #ifdef CONFIG_MEMORY_FAILURE
> PAGEFLAG(HWPoison, hwpoison)
> TESTSCFLAG(HWPoison, hwpoison)
> +PAGEFLAG(HWPoisonLock, hwpoison_lock)
> +TESTSCFLAG(HWPoisonLock, hwpoison_lock)
> #define __PG_HWPOISON (1UL << PG_hwpoison)
> +#define __PG_HWPOISON_LOCK (1UL << PG_hwpoison_lock)
> #else
> PAGEFLAG_FALSE(HWPoison)
> #define __PG_HWPOISON 0
> +PAGEFLAG_FALSE(HWPoisonLock)
> +#define __PG_HWPOISON_LOCK 0
> #endif
>
> u64 stable_page_flags(struct page *page);
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index c74f8bb..9c3bdcb 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -508,6 +508,33 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
> return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
> }
>
> +#ifdef CONFIG_MEMORY_FAILURE
> +extern void __lock_hwpoison_page(struct page *page);
> +extern void unlock_hwpoison_page(struct page *page);
> +
> +static inline void __set_page_hwpoison_lock(struct page *page)
> +{
> + __set_bit(PG_hwpoison_lock, &page->flags);
> +}
> +
> +static inline void __clear_page_hwpoison_lock(struct page *page)
> +{
> + __clear_bit(PG_hwpoison_lock, &page->flags);
> +}
> +
> +static inline int trylock_hwpoison_page(struct page *page)
> +{
> + return likely(!test_and_set_bit_lock(PG_hwpoison_lock, &page->flags));
> +}
> +
> +static inline void lock_hwpoison_page(struct page *page)
> +{
> + might_sleep();
> + if (!trylock_hwpoison_page(page))
> + __lock_hwpoison_page(page);
> +}
> +#endif
> +
> /*
> * This is exported only for wait_on_page_locked/wait_on_page_writeback.
> * Never use this directly!
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bec4b9b..9162527 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -843,6 +843,36 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
> }
> }
>
> +#ifdef CONFIG_MEMORY_FAILURE
> +/**
> + * unlock_hwpoison_page -unlock a poisonous page
> + * @page: the page
> + * Unlocks the poisonous page and wakes up sleeper in __wait_on_bit_lock().
> + */
> +void unlock_hwpoison_page(struct page *page)
> +{
> + VM_BUG_ON(!PageHWPoisonLock(page));
> + clear_bit_unlock(PG_hwpoison_lock, &page->flags);
> + smp_mb__after_clear_bit();
> + wake_up_page(page, PG_hwpoison_lock);
> +}
> +EXPORT_SYMBOL(unlock_hwpoison_page);
> +
> +/**
> + * __lock_hwpoison_page - get a lock one the poisonous page, assuming
> + * we need to sleep to get it
> + * @page: the posionous page to lock
> + */
> +void __lock_hwpoison_page(struct page *page)
> +{
> + DEFINE_WAIT_BIT(wait, &page->flags, PG_hwpoison_lock);
> +
> + __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
> + TASK_UNINTERRUPTIBLE);
> +}
> +EXPORT_SYMBOL(__lock_hwpoison_page);
> +#endif
> +
> /**
> * page_cache_next_hole - find the next hole (not-present entry)
> * @mapping: mapping
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 9872af1..f839a54 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1041,9 +1041,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> }
>
> p = pfn_to_page(pfn);
> + lock_hwpoison_page(p);
> hpage = compound_head(p);
> if (TestSetPageHWPoison(p)) {
> printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
> + unlock_hwpoison_page(p);
> return 0;
> }
>
> @@ -1078,6 +1080,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> !get_page_unless_zero(hpage)) {
> if (is_free_buddy_page(p)) {
> action_result(pfn, "free buddy", DELAYED);
> + unlock_hwpoison_page(p);
> return 0;
> } else if (PageHuge(hpage)) {
> /*
> @@ -1089,6 +1092,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> || (p != hpage && TestSetPageHWPoison(hpage))) {
> atomic_long_sub(nr_pages, &num_poisoned_pages);
> unlock_page(hpage);
> + unlock_hwpoison_page(p);
> return 0;
> }
> }
> @@ -1097,9 +1101,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> action_result(pfn, "free huge",
> res ? IGNORED : DELAYED);
> unlock_page(hpage);
> + unlock_hwpoison_page(p);
> return res;
> } else {
> action_result(pfn, "high order kernel", IGNORED);
> + unlock_hwpoison_page(p);
> return -EBUSY;
> }
> }
> @@ -1124,10 +1130,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> action_result(pfn, "free buddy", DELAYED);
> else
> action_result(pfn, "free buddy, 2nd try", DELAYED);
> + unlock_hwpoison_page(p);
> return 0;
> }
> action_result(pfn, "non LRU", IGNORED);
> put_page(p);
> + unlock_hwpoison_page(p);
> return -EBUSY;
> }
> }
> @@ -1161,6 +1169,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> atomic_long_sub(nr_pages, &num_poisoned_pages);
> unlock_page(hpage);
> put_page(hpage);
> + unlock_hwpoison_page(p);
> return 0;
> }
>
> @@ -1173,6 +1182,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> IGNORED);
> unlock_page(hpage);
> put_page(hpage);
> + unlock_hwpoison_page(p);
> return 0;
> }
> /*
> @@ -1228,6 +1238,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
> res = page_action(ps, p, pfn);
> out:
> unlock_page(hpage);
> + unlock_hwpoison_page(p);
> return res;
> }
> EXPORT_SYMBOL_GPL(memory_failure);
> --
> 1.7.10.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-05-15 14:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-15 11:16 [PATCH] HWPOISON: avoid repeatedly raising some MCEs for a shared page Chen Yucong
2014-05-15 14:38 ` Andi Kleen
2014-05-15 14:52 ` Naoya Horiguchi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox