linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Em Sharnoff <sharnoff@neon.tech>
To: linux-kernel@vger.kernel.org, x86@kernel.org, linux-mm@kvack.org
Cc: Ingo Molnar <mingo@kernel.org>, "H. Peter Anvin" <hpa@zytor.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@alien8.de>,
	"Edgecombe, Rick P" <rick.p.edgecombe@intel.com>,
	Oleg Vasilev <oleg@neon.tech>,
	Arthur Petukhovsky <arthur@neon.tech>,
	Stefan Radig <stefan@neon.tech>, Misha Sakhnov <misha@neon.tech>
Subject: [PATCH v2 1/2] x86/mm: Handle alloc failure in phys_*_init()
Date: Mon, 9 Jun 2025 11:33:08 +0100	[thread overview]
Message-ID: <25c5e747-107f-4450-8eb0-11b2f0dab14d@neon.tech> (raw)
In-Reply-To: <0ce5e150-19e0-457f-bec3-ee031c0be7e7@neon.tech>

During memory hotplug, allocation failures in phys_*_init() aren't
handled, which results in a null pointer dereference, if they occur.

To handle that, change phys_pud_init() and similar functions to return
allocation errors via ERR_PTR() and check for that in arch_add_memory().

Signed-off-by: Em Sharnoff <sharnoff@neon.tech>
---
Changelog:
- v2: switch from special-casing zero value to using ERR_PTR()
---
 arch/x86/mm/init.c    |  6 ++++-
 arch/x86/mm/init_64.c | 54 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bfa444a7dbb0..82dd5ce03dd6 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -533,6 +533,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
  * the physical memory. To access them they are temporarily mapped.
+ * Allocation errors are returned with ERR_PTR.
  */
 unsigned long __ref init_memory_mapping(unsigned long start,
 					unsigned long end, pgprot_t prot)
@@ -547,10 +548,13 @@ unsigned long __ref init_memory_mapping(unsigned long start,
 	memset(mr, 0, sizeof(mr));
 	nr_range = split_mem_range(mr, 0, start, end);
 
-	for (i = 0; i < nr_range; i++)
+	for (i = 0; i < nr_range; i++) {
 		ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
 						   mr[i].page_size_mask,
 						   prot);
+		if (IS_ERR(ret))
+			return ret;
+	}
 
 	add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7c4f6f591f2b..3ab261aa8eff 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -502,7 +502,8 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 /*
  * Create PMD level page table mapping for physical addresses. The virtual
  * and physical address have to be aligned at this level.
- * It returns the last physical address mapped.
+ * It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
  */
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
@@ -572,7 +573,14 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		}
 
 		pte = alloc_low_page();
+		if (!pte)
+			return (unsigned long)ERR_PTR(-ENOMEM);
 		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
+		/*
+		 * phys_{ppmd,pud,p4d}_init return allocation errors via ERR_PTR.
+		 * phys_pte_init makes no allocations, so should not error.
+		 */
+		BUG_ON(IS_ERR(paddr_last));
 
 		spin_lock(&init_mm.page_table_lock);
 		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
@@ -586,7 +594,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
  * Create PUD level page table mapping for physical addresses. The virtual
  * and physical address do not have to be aligned at this level. KASLR can
  * randomize virtual addresses up to this level.
- * It returns the last physical address mapped.
+ * It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
  */
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
@@ -623,6 +632,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 							   paddr_end,
 							   page_size_mask,
 							   prot, init);
+				if (IS_ERR(paddr_last))
+					return paddr_last;
 				continue;
 			}
 			/*
@@ -658,12 +669,22 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 		}
 
 		pmd = alloc_low_page();
+		if (!pmd)
+			return (unsigned long)ERR_PTR(-ENOMEM);
 		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
 					   page_size_mask, prot, init);
 
+		/*
+		 * We might have IS_ERR(paddr_last) if allocation failed, but we should
+		 * still update pud before bailing, so that subsequent retries can pick
+		 * up on progress (here and in phys_pmd_init) without leaking pmd.
+		 */
 		spin_lock(&init_mm.page_table_lock);
 		pud_populate_init(&init_mm, pud, pmd, init);
 		spin_unlock(&init_mm.page_table_lock);
+
+		if (IS_ERR(paddr_last))
+			return paddr_last;
 	}
 
 	update_page_count(PG_LEVEL_1G, pages);
@@ -707,16 +728,26 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 			pud = pud_offset(p4d, 0);
 			paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
 					page_size_mask, prot, init);
+			if (IS_ERR(paddr_last))
+				return paddr_last;
 			continue;
 		}
 
 		pud = alloc_low_page();
+		if (!pud)
+			return (unsigned long)ERR_PTR(-ENOMEM);
 		paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
 					   page_size_mask, prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
 		p4d_populate_init(&init_mm, p4d, pud, init);
 		spin_unlock(&init_mm.page_table_lock);
+
+		/*
+		 * Bail only after updating p4d to keep progress from pud across retries.
+		 */
+		if (IS_ERR(paddr_last))
+			return paddr_last;
 	}
 
 	return paddr_last;
@@ -748,10 +779,14 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
 						   __pa(vaddr_end),
 						   page_size_mask,
 						   prot, init);
+			if (IS_ERR(paddr_last))
+				return paddr_last;
 			continue;
 		}
 
 		p4d = alloc_low_page();
+		if (!p4d)
+			return (unsigned long)ERR_PTR(-ENOMEM);
 		paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
 					   page_size_mask, prot, init);
 
@@ -763,6 +798,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
 					  (pud_t *) p4d, init);
 
 		spin_unlock(&init_mm.page_table_lock);
+
+		/*
+		 * Bail only after updating pgd/p4d to keep progress from p4d across retries.
+		 */
+		if (IS_ERR(paddr_last))
+			return paddr_last;
+
 		pgd_changed = true;
 	}
 
@@ -777,7 +819,8 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
  * Create page table mapping for the physical memory for specific physical
  * addresses. Note that it can only be used to populate non-present entries.
  * The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
+ * down. It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
  */
 unsigned long __meminit
 kernel_physical_mapping_init(unsigned long paddr_start,
@@ -980,8 +1023,11 @@ int arch_add_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned long ret = 0;
 
-	init_memory_mapping(start, start + size, params->pgprot);
+	ret = init_memory_mapping(start, start + size, params->pgprot);
+	if (IS_ERR(ret))
+		return (int)PTR_ERR(ret);
 
 	return add_pages(nid, start_pfn, nr_pages, params);
 }
-- 
2.39.5



  reply	other threads:[~2025-06-09 10:33 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-09 10:32 [PATCH v2 0/2] x86/mm: Improve alloc handling of phys_*_init() Em Sharnoff
2025-06-09 10:33 ` Em Sharnoff [this message]
2025-06-09 17:15   ` [PATCH v2 1/2] x86/mm: Handle alloc failure in phys_*_init() kernel test robot
2025-06-10 10:19     ` Em Sharnoff
2025-06-09 17:56   ` kernel test robot
2025-06-09 10:34 ` [PATCH v2 2/2] x86/mm: Use GFP_KERNEL for alloc_low_pages() after boot Em Sharnoff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=25c5e747-107f-4450-8eb0-11b2f0dab14d@neon.tech \
    --to=sharnoff@neon.tech \
    --cc=arthur@neon.tech \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=misha@neon.tech \
    --cc=oleg@neon.tech \
    --cc=peterz@infradead.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=stefan@neon.tech \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox