linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
@ 2025-12-19 17:55 Jane Chu
  2025-12-19 18:18 ` Matthew Wilcox
  0 siblings, 1 reply; 3+ messages in thread
From: Jane Chu @ 2025-12-19 17:55 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mm, stable, muchun.song, osalvador, david, linmiaohe,
	jiaqiyan, william.roche, rientjes, akpm, lorenzo.stoakes,
	Liam.Howlett, rppt, surenb, mhocko

When a hugetlb folio is being poisoned again, try_memory_failure_hugetlb()
passed head pfn to kill_accessing_process(), that is not right.
The precise pfn of the poisoned page should be used in order to
determine the precise vaddr as the SIGBUS payload.

This issue has already been taken care of in the normal path, that is,
hwpoison_user_mappings(), see [1][2].  Further more, for [3] to work
correctly in the hugetlb repoisoning case, it's essential to inform
VM the precise poisoned page, not the head page.

[1] https://lkml.kernel.org/r/20231218135837.3310403-1-willy@infradead.org
[2] https://lkml.kernel.org/r/20250224211445.2663312-1-jane.chu@oracle.com
[3] https://lore.kernel.org/lkml/20251116013223.1557158-1-jiaqiyan@google.com/

Cc: <stable@vger.kernel.org>
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
v1 -> v2:
  pickup R-B, add stable to cc list.
---
 mm/memory-failure.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3edebb0cda30..c9d87811b1ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -681,9 +681,11 @@ static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
 }
 
 static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
-				unsigned long poisoned_pfn, struct to_kill *tk)
+				unsigned long poisoned_pfn, struct to_kill *tk,
+				int pte_nr)
 {
 	unsigned long pfn = 0;
+	unsigned long hwpoison_vaddr;
 
 	if (pte_present(pte)) {
 		pfn = pte_pfn(pte);
@@ -694,10 +696,11 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
 			pfn = swp_offset_pfn(swp);
 	}
 
-	if (!pfn || pfn != poisoned_pfn)
+	if (!pfn || (pfn > poisoned_pfn || (pfn + pte_nr - 1) < poisoned_pfn))
 		return 0;
 
-	set_to_kill(tk, addr, shift);
+	hwpoison_vaddr = addr + ((poisoned_pfn - pfn) << PAGE_SHIFT);
+	set_to_kill(tk, hwpoison_vaddr, shift);
 	return 1;
 }
 
@@ -749,7 +752,7 @@ static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
 
 	for (; addr != end; ptep++, addr += PAGE_SIZE) {
 		ret = check_hwpoisoned_entry(ptep_get(ptep), addr, PAGE_SHIFT,
-					     hwp->pfn, &hwp->tk);
+					     hwp->pfn, &hwp->tk, 1);
 		if (ret == 1)
 			break;
 	}
@@ -772,8 +775,8 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 
 	ptl = huge_pte_lock(h, walk->mm, ptep);
 	pte = huge_ptep_get(walk->mm, addr, ptep);
-	ret = check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
-					hwp->pfn, &hwp->tk);
+	ret = check_hwpoisoned_entry(pte, addr, huge_page_shift(h), hwp->pfn,
+				&hwp->tk, pages_per_huge_page(h));
 	spin_unlock(ptl);
 	return ret;
 }
@@ -2023,10 +2026,8 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 		*hugetlb = 0;
 		return 0;
 	} else if (res == -EHWPOISON) {
-		if (flags & MF_ACTION_REQUIRED) {
-			folio = page_folio(p);
-			res = kill_accessing_process(current, folio_pfn(folio), flags);
-		}
+		if (flags & MF_ACTION_REQUIRED)
+			res = kill_accessing_process(current, pfn, flags);
 		action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
 		return res;
 	} else if (res == -EBUSY) {
@@ -2037,6 +2038,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 		return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
 	}
 
+
 	folio = page_folio(p);
 	folio_lock(folio);
 
-- 
2.43.5



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
  2025-12-19 17:55 [PATCH v2] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn Jane Chu
@ 2025-12-19 18:18 ` Matthew Wilcox
  2025-12-22 20:30   ` jane.chu
  0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2025-12-19 18:18 UTC (permalink / raw)
  To: Jane Chu
  Cc: linux-kernel, linux-mm, stable, muchun.song, osalvador, david,
	linmiaohe, jiaqiyan, william.roche, rientjes, akpm,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mhocko

On Fri, Dec 19, 2025 at 10:55:16AM -0700, Jane Chu wrote:
>  static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
> -				unsigned long poisoned_pfn, struct to_kill *tk)
> +				unsigned long poisoned_pfn, struct to_kill *tk,
> +				int pte_nr)

if we pass in huge_page_mask() instead ...

>  {
>  	unsigned long pfn = 0;
> +	unsigned long hwpoison_vaddr;
>  
>  	if (pte_present(pte)) {
>  		pfn = pte_pfn(pte);
> @@ -694,10 +696,11 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
>  			pfn = swp_offset_pfn(swp);
>  	}
>  
> -	if (!pfn || pfn != poisoned_pfn)
> +	if (!pfn || (pfn > poisoned_pfn || (pfn + pte_nr - 1) < poisoned_pfn))

... then we can simplify this to:

	if (!pfn || ((pfn | mask) != (poisoned_pfn | mask))

>  		return 0;
> @@ -2037,6 +2038,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  		return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
>  	}
>  
> +
>  	folio = page_folio(p);
>  	folio_lock(folio);

unnecessary whitespace change


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
  2025-12-19 18:18 ` Matthew Wilcox
@ 2025-12-22 20:30   ` jane.chu
  0 siblings, 0 replies; 3+ messages in thread
From: jane.chu @ 2025-12-22 20:30 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: linux-kernel, linux-mm, stable, muchun.song, osalvador, david,
	linmiaohe, jiaqiyan, william.roche, rientjes, akpm,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mhocko



On 12/19/2025 10:18 AM, Matthew Wilcox wrote:
> On Fri, Dec 19, 2025 at 10:55:16AM -0700, Jane Chu wrote:
>>   static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
>> -				unsigned long poisoned_pfn, struct to_kill *tk)
>> +				unsigned long poisoned_pfn, struct to_kill *tk,
>> +				int pte_nr)
> 
> if we pass in huge_page_mask() instead ...
> 
>>   {
>>   	unsigned long pfn = 0;
>> +	unsigned long hwpoison_vaddr;
>>   
>>   	if (pte_present(pte)) {
>>   		pfn = pte_pfn(pte);
>> @@ -694,10 +696,11 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
>>   			pfn = swp_offset_pfn(swp);
>>   	}
>>   
>> -	if (!pfn || pfn != poisoned_pfn)
>> +	if (!pfn || (pfn > poisoned_pfn || (pfn + pte_nr - 1) < poisoned_pfn))
> 
> ... then we can simplify this to:
> 
> 	if (!pfn || ((pfn | mask) != (poisoned_pfn | mask))
> 
>>   		return 0;
>> @@ -2037,6 +2038,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>>   		return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
>>   	}
>>   
>> +
>>   	folio = page_folio(p);
>>   	folio_lock(folio);
> 
> unnecessary whitespace change

Thanks! I'll incorporate the mask idea in v3.

-jane



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-12-22 20:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-19 17:55 [PATCH v2] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn Jane Chu
2025-12-19 18:18 ` Matthew Wilcox
2025-12-22 20:30   ` jane.chu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox