linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler
@ 2022-02-18  9:40 luofei
  2022-02-21  1:32 ` HORIGUCHI NAOYA(堀口 直也)
  0 siblings, 1 reply; 3+ messages in thread
From: luofei @ 2022-02-18  9:40 UTC (permalink / raw)
  To: tony.luck, bp, tglx, mingo, dave.hansen, x86, akpm, naoya.horiguchi
  Cc: hpa, linux-edac, linux-kernel, linux-mm, luofei

When the hwpoison page meets the filter conditions, it should
not be regarded as successful memory_failure() processing for
mce handler, but should return a value(-EHWPOISON), otherwise
mce handler regards the error page has been identified and
isolated, which may lead to calling set_mce_nospec() to change
page attribute, etc.

Here a new MF_MCE__HANDLE flag is introdulced to identify the
call from the mce handler and instruct hwpoison_filter() to
return -EHWPOISON, other return 0 for compatibility with the
hwpoison injector.

Signed-off-by: luofei <luofei@unicloud.com>
---
 arch/x86/kernel/cpu/mce/core.c | 15 +++++++++------
 include/linux/mm.h             |  1 +
 mm/memory-failure.c            | 14 ++++++++++++--
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..de29165c65b6 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 		return NOTIFY_DONE;
 
 	pfn = mce->addr >> PAGE_SHIFT;
-	if (!memory_failure(pfn, 0)) {
+	if (!memory_failure(pfn, MF_MCE_HANDLE)) {
 		set_mce_nospec(pfn, whole_page(mce));
 		mce->kflags |= MCE_HANDLED_UC;
 	}
@@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch)
 static void kill_me_maybe(struct callback_head *cb)
 {
 	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
-	int flags = MF_ACTION_REQUIRED;
+	int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
 	int ret;
 
 	p->mce_count = 0;
@@ -1303,9 +1303,12 @@ static void kill_me_maybe(struct callback_head *cb)
 	}
 
 	/*
-	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
-	 * to the current process with the proper error info, so no need to
-	 * send SIGBUS here again.
+	 * -EHWPOISON from memory_failure() means that memory_failure() did
+	 * not handle the error event for the following reason:
+	 *  - SIGBUS has already been sent to the current process with the
+	 *    proper error info, or
+	 *  - hwpoison_filter() filtered the event,
+	 * so no need to deal with it more.
 	 */
 	if (ret == -EHWPOISON)
 		return;
@@ -1320,7 +1323,7 @@ static void kill_me_never(struct callback_head *cb)
 
 	p->mce_count = 0;
 	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
-	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..f4703f948e9a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3188,6 +3188,7 @@ enum mf_flags {
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
 	MF_UNPOISON = 1 << 4,
+	MF_MCE_HANDLE = 1 << 5,
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 97a9ed8f87a9..59d6d939a752 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 				if (TestClearPageHWPoison(head))
 					num_poisoned_pages_dec();
 				unlock_page(head);
-				return 0;
+				if (flags & MF_MCE_HANDLE)
+					return -EHWPOISON;
+				else
+					return 0;
 			}
 			unlock_page(head);
 			res = MF_FAILED;
@@ -1613,7 +1616,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		goto out;
 
 	if (hwpoison_filter(page)) {
-		rc = 0;
+		if (flags & MF_MCE_HANDLE)
+			rc = -EHWPOISON;
+		else
+			rc = 0;
 		goto unlock;
 	}
 
@@ -1837,6 +1843,10 @@ int memory_failure(unsigned long pfn, int flags)
 			num_poisoned_pages_dec();
 		unlock_page(p);
 		put_page(p);
+		if (flags & MF_MCE_HANDLE)
+			res = -EHWPOISON;
+		else
+			res = 0;
 		goto unlock_mutex;
 	}
 
-- 
2.27.0



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler
  2022-02-18  9:40 [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler luofei
@ 2022-02-21  1:32 ` HORIGUCHI NAOYA(堀口 直也)
  2022-02-21  1:48   ` 答复: " 罗飞
  0 siblings, 1 reply; 3+ messages in thread
From: HORIGUCHI NAOYA(堀口 直也) @ 2022-02-21  1:32 UTC (permalink / raw)
  To: luofei
  Cc: tony.luck, bp, tglx, mingo, dave.hansen, x86, akpm, hpa,
	linux-edac, linux-kernel, linux-mm

Conventionally, "hwpoison" or "HWPOISON" (instead of "hw/poison") is used in
the subsystem tag in $SUBJECT, so could you align it (for grep friendliness) ?

# I should've mentioned this in previous review round, sorry.

On Fri, Feb 18, 2022 at 04:40:56AM -0500, luofei wrote:
> When the hwpoison page meets the filter conditions, it should
> not be regarded as successful memory_failure() processing for
> mce handler, but should return a value(-EHWPOISON), otherwise
> mce handler regards the error page has been identified and
> isolated, which may lead to calling set_mce_nospec() to change
> page attribute, etc.
> 
> Here a new MF_MCE__HANDLE flag is introdulced to identify the
                   ^^               ^^^^^^^^^^^
                 single '_' ?       introduced

> call from the mce handler and instruct hwpoison_filter() to
> return -EHWPOISON, other return 0 for compatibility with the
                     ^^^^^
                     otherwise ?

> hwpoison injector.
> 
> Signed-off-by: luofei <luofei@unicloud.com>

The diff looks ok to me, so with updates on the subject/description ...

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Thanks,
Naoya Horiguchi

> ---
>  arch/x86/kernel/cpu/mce/core.c | 15 +++++++++------
>  include/linux/mm.h             |  1 +
>  mm/memory-failure.c            | 14 ++++++++++++--
>  3 files changed, 22 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 5818b837fd4d..de29165c65b6 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
>  		return NOTIFY_DONE;
>  
>  	pfn = mce->addr >> PAGE_SHIFT;
> -	if (!memory_failure(pfn, 0)) {
> +	if (!memory_failure(pfn, MF_MCE_HANDLE)) {
>  		set_mce_nospec(pfn, whole_page(mce));
>  		mce->kflags |= MCE_HANDLED_UC;
>  	}
> @@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch)
>  static void kill_me_maybe(struct callback_head *cb)
>  {
>  	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
> -	int flags = MF_ACTION_REQUIRED;
> +	int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
>  	int ret;
>  
>  	p->mce_count = 0;
> @@ -1303,9 +1303,12 @@ static void kill_me_maybe(struct callback_head *cb)
>  	}
>  
>  	/*
> -	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
> -	 * to the current process with the proper error info, so no need to
> -	 * send SIGBUS here again.
> +	 * -EHWPOISON from memory_failure() means that memory_failure() did
> +	 * not handle the error event for the following reason:
> +	 *  - SIGBUS has already been sent to the current process with the
> +	 *    proper error info, or
> +	 *  - hwpoison_filter() filtered the event,
> +	 * so no need to deal with it more.
>  	 */
>  	if (ret == -EHWPOISON)
>  		return;
> @@ -1320,7 +1323,7 @@ static void kill_me_never(struct callback_head *cb)
>  
>  	p->mce_count = 0;
>  	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
> -	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
> +	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
>  		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
>  }
>  
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b192..f4703f948e9a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3188,6 +3188,7 @@ enum mf_flags {
>  	MF_MUST_KILL = 1 << 2,
>  	MF_SOFT_OFFLINE = 1 << 3,
>  	MF_UNPOISON = 1 << 4,
> +	MF_MCE_HANDLE = 1 << 5,
>  };
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue(unsigned long pfn, int flags);
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 97a9ed8f87a9..59d6d939a752 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  				if (TestClearPageHWPoison(head))
>  					num_poisoned_pages_dec();
>  				unlock_page(head);
> -				return 0;
> +				if (flags & MF_MCE_HANDLE)
> +					return -EHWPOISON;
> +				else
> +					return 0;
>  			}
>  			unlock_page(head);
>  			res = MF_FAILED;
> @@ -1613,7 +1616,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
>  		goto out;
>  
>  	if (hwpoison_filter(page)) {
> -		rc = 0;
> +		if (flags & MF_MCE_HANDLE)
> +			rc = -EHWPOISON;
> +		else
> +			rc = 0;
>  		goto unlock;
>  	}
>  
> @@ -1837,6 +1843,10 @@ int memory_failure(unsigned long pfn, int flags)
>  			num_poisoned_pages_dec();
>  		unlock_page(p);
>  		put_page(p);
> +		if (flags & MF_MCE_HANDLE)
> +			res = -EHWPOISON;
> +		else
> +			res = 0;
>  		goto unlock_mutex;
>  	}
>  
> -- 
> 2.27.0

^ permalink raw reply	[flat|nested] 3+ messages in thread

* 答复: [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler
  2022-02-21  1:32 ` HORIGUCHI NAOYA(堀口 直也)
@ 2022-02-21  1:48   ` 罗飞
  0 siblings, 0 replies; 3+ messages in thread
From: 罗飞 @ 2022-02-21  1:48 UTC (permalink / raw)
  To: HORIGUCHI NAOYA(堀口 直也)
  Cc: tony.luck, bp, tglx, mingo, dave.hansen, x86, akpm, hpa,
	linux-edac, linux-kernel, linux-mm

[-- Attachment #1: Type: text/plain, Size: 7234 bytes --]

>Conventionally, "hwpoison" or "HWPOISON" (instead of "hw/poison") is used in the subsystem tag in $SUBJECT, so could you align it (for grep friendliness) ?

 >

># I should've mentioned this in previous review round, sorry.

>

>On Fri, Feb 18, 2022 at 04:40:56AM -0500, luofei wrote:

>> When the hwpoison page meets the filter conditions, it should not be

>> regarded as successful memory_failure() processing for mce handler,

>> but should return a value(-EHWPOISON), otherwise mce handler regards

>> the error page has been identified and isolated, which may lead to

>> calling set_mce_nospec() to change page attribute, etc.

>>

>> Here a new MF_MCE__HANDLE flag is introdulced to identify the

>                   ^^               ^^^^^^^^^^^

>                 single '_' ?       introduced

>

>> call from the mce handler and instruct hwpoison_filter() to return

>> -EHWPOISON, other return 0 for compatibility with the

>                     ^^^^^

>                     otherwise ?

>

>> hwpoison injector.


I am so sorry for my carelessness, I will resubmit the updated patches.


Thanks

________________________________
发件人: HORIGUCHI NAOYA(堀口 直也) <naoya.horiguchi@nec.com>
发送时间: 2022年2月21日 9:32:16
收件人: 罗飞
抄送: tony.luck@intel.com; bp@alien8.de; tglx@linutronix.de; mingo@redhat.com; dave.hansen@linux.intel.com; x86@kernel.org; akpm@linux-foundation.org; hpa@zytor.com; linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org
主题: Re: [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler

Conventionally, "hwpoison" or "HWPOISON" (instead of "hw/poison") is used in
the subsystem tag in $SUBJECT, so could you align it (for grep friendliness) ?

# I should've mentioned this in previous review round, sorry.

On Fri, Feb 18, 2022 at 04:40:56AM -0500, luofei wrote:
> When the hwpoison page meets the filter conditions, it should
> not be regarded as successful memory_failure() processing for
> mce handler, but should return a value(-EHWPOISON), otherwise
> mce handler regards the error page has been identified and
> isolated, which may lead to calling set_mce_nospec() to change
> page attribute, etc.
>
> Here a new MF_MCE__HANDLE flag is introdulced to identify the
                   ^^               ^^^^^^^^^^^
                 single '_' ?       introduced

> call from the mce handler and instruct hwpoison_filter() to
> return -EHWPOISON, other return 0 for compatibility with the
                     ^^^^^
                     otherwise ?

> hwpoison injector.
>
> Signed-off-by: luofei <luofei@unicloud.com>

The diff looks ok to me, so with updates on the subject/description ...

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Thanks,
Naoya Horiguchi

> ---
>  arch/x86/kernel/cpu/mce/core.c | 15 +++++++++------
>  include/linux/mm.h             |  1 +
>  mm/memory-failure.c            | 14 ++++++++++++--
>  3 files changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 5818b837fd4d..de29165c65b6 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
>                return NOTIFY_DONE;
>
>        pfn = mce->addr >> PAGE_SHIFT;
> -     if (!memory_failure(pfn, 0)) {
> +     if (!memory_failure(pfn, MF_MCE_HANDLE)) {
>                set_mce_nospec(pfn, whole_page(mce));
>                mce->kflags |= MCE_HANDLED_UC;
>        }
> @@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch)
>  static void kill_me_maybe(struct callback_head *cb)
>  {
>        struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
> -     int flags = MF_ACTION_REQUIRED;
> +     int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
>        int ret;
>
>        p->mce_count = 0;
> @@ -1303,9 +1303,12 @@ static void kill_me_maybe(struct callback_head *cb)
>        }
>
>        /*
> -      * -EHWPOISON from memory_failure() means that it already sent SIGBUS
> -      * to the current process with the proper error info, so no need to
> -      * send SIGBUS here again.
> +      * -EHWPOISON from memory_failure() means that memory_failure() did
> +      * not handle the error event for the following reason:
> +      *  - SIGBUS has already been sent to the current process with the
> +      *    proper error info, or
> +      *  - hwpoison_filter() filtered the event,
> +      * so no need to deal with it more.
>         */
>        if (ret == -EHWPOISON)
>                return;
> @@ -1320,7 +1323,7 @@ static void kill_me_never(struct callback_head *cb)
>
>        p->mce_count = 0;
>        pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
> -     if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
> +     if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
>                set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
>  }
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b192..f4703f948e9a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3188,6 +3188,7 @@ enum mf_flags {
>        MF_MUST_KILL = 1 << 2,
>        MF_SOFT_OFFLINE = 1 << 3,
>        MF_UNPOISON = 1 << 4,
> +     MF_MCE_HANDLE = 1 << 5,
>  };
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue(unsigned long pfn, int flags);
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 97a9ed8f87a9..59d6d939a752 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>                                if (TestClearPageHWPoison(head))
>                                        num_poisoned_pages_dec();
>                                unlock_page(head);
> -                             return 0;
> +                             if (flags & MF_MCE_HANDLE)
> +                                     return -EHWPOISON;
> +                             else
> +                                     return 0;
>                        }
>                        unlock_page(head);
>                        res = MF_FAILED;
> @@ -1613,7 +1616,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
>                goto out;
>
>        if (hwpoison_filter(page)) {
> -             rc = 0;
> +             if (flags & MF_MCE_HANDLE)
> +                     rc = -EHWPOISON;
> +             else
> +                     rc = 0;
>                goto unlock;
>        }
>
> @@ -1837,6 +1843,10 @@ int memory_failure(unsigned long pfn, int flags)
>                        num_poisoned_pages_dec();
>                unlock_page(p);
>                put_page(p);
> +             if (flags & MF_MCE_HANDLE)
> +                     res = -EHWPOISON;
> +             else
> +                     res = 0;
>                goto unlock_mutex;
>        }
>
> --
> 2.27.0

[-- Attachment #2: Type: text/html, Size: 16146 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-02-21  1:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-18  9:40 [PATCH v3 1/2] hw/poison: Avoid the impact of hwpoison_filter() return value on mce handler luofei
2022-02-21  1:32 ` HORIGUCHI NAOYA(堀口 直也)
2022-02-21  1:48   ` 答复: " 罗飞

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox