[PATCH RFC 08/12] x86: make kernel text patching aware about replicas

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: <artem.kuzin@huawei.com>
To: <x86@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>,
	<bp@alien8.de>, <dave.hansen@linux.intel.com>, <hpa@zytor.com>,
	<luto@kernel.org>, <peterz@infradead.org>,
	<akpm@linux-foundation.org>, <urezki@gmail.com>,
	<hch@infradead.org>, <lstoakes@gmail.com>, <mcgrof@kernel.org>,
	<rmk+kernel@armlinux.org.uk>
Cc: <nikita.panov@huawei-partners.com>,
	<alexander.grubnikov@huawei.com>, <stepanov.anatoly@huawei.com>,
	<guohanjun@huawei.com>, <weiyongjun1@huawei.com>,
	<wangkefeng.wang@huawei.com>, <judy.chenhui@huawei.com>,
	<yusongping@huawei.com>, <kang.sun@huawei.com>,
	<linux-mm@kvack.org>, <linux-modules@vger.kernel.org>
Subject: [PATCH RFC 08/12] x86: make kernel text patching aware about replicas
Date: Thu, 28 Dec 2023 21:10:52 +0800	[thread overview]
Message-ID: <20231228131056.602411-9-artem.kuzin@huawei.com> (raw)
In-Reply-To: <20231228131056.602411-1-artem.kuzin@huawei.com>

From: Artem Kuzin <artem.kuzin@huawei.com>

Co-developed-by: Nikita Panov <nikita.panov@huawei-partners.com>
Signed-off-by: Nikita Panov <nikita.panov@huawei-partners.com>
Co-developed-by: Alexander Grubnikov <alexander.grubnikov@huawei.com>
Signed-off-by: Alexander Grubnikov <alexander.grubnikov@huawei.com>
Signed-off-by: Artem Kuzin <artem.kuzin@huawei.com>
---
 arch/x86/kernel/alternative.c | 116 ++++++++++++++++++----------------
 1 file changed, 62 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 44843a492e69..b0abd60bcafe 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -18,6 +18,7 @@
 #include <linux/mmu_context.h>
 #include <linux/bsearch.h>
 #include <linux/sync_core.h>
+#include <linux/numa_replication.h>
 #include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
@@ -1659,6 +1660,7 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
 				      size_t len)
 {
 	unsigned long flags;
+	int nid;
 
 	if (boot_cpu_has(X86_FEATURE_NX) &&
 	    is_module_text_address((unsigned long)addr)) {
@@ -1669,8 +1671,18 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
 		 */
 		memcpy(addr, opcode, len);
 	} else {
+		unsigned long iaddr = (unsigned long)addr;
+
 		local_irq_save(flags);
-		memcpy(addr, opcode, len);
+		if (is_text_replicated() && is_kernel_text(iaddr)) {
+			for_each_replica(nid) {
+				void *vaddr = numa_addr_in_replica(addr, nid);
+
+				memcpy(vaddr, opcode, len);
+			}
+		} else {
+			memcpy(addr, opcode, len);
+		}
 		local_irq_restore(flags);
 		sync_core();
 
@@ -1764,36 +1776,21 @@ typedef void text_poke_f(void *dst, const void *src, size_t len);
 
 static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len)
 {
+	int nid;
 	bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
 	struct page *pages[2] = {NULL};
 	temp_mm_state_t prev;
 	unsigned long flags;
+	int size_in_poking_mm = PAGE_SIZE;
 	pte_t pte, *ptep;
 	spinlock_t *ptl;
 	pgprot_t pgprot;
-
+	bool has_replica = numa_addr_has_replica(addr);
 	/*
 	 * While boot memory allocator is running we cannot use struct pages as
 	 * they are not yet initialized. There is no way to recover.
 	 */
 	BUG_ON(!after_bootmem);
-
-	if (!core_kernel_text((unsigned long)addr)) {
-		pages[0] = vmalloc_to_page(addr);
-		if (cross_page_boundary)
-			pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
-	} else {
-		pages[0] = virt_to_page(addr);
-		WARN_ON(!PageReserved(pages[0]));
-		if (cross_page_boundary)
-			pages[1] = virt_to_page(addr + PAGE_SIZE);
-	}
-	/*
-	 * If something went wrong, crash and burn since recovery paths are not
-	 * implemented.
-	 */
-	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
-
 	/*
 	 * Map the page without the global bit, as TLB flushing is done with
 	 * flush_tlb_mm_range(), which is intended for non-global PTEs.
@@ -1812,48 +1809,59 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
 
 	local_irq_save(flags);
 
-	pte = mk_pte(pages[0], pgprot);
-	set_pte_at(poking_mm, poking_addr, ptep, pte);
+	for_each_replica(nid) {
+		prev = use_temporary_mm(poking_mm);
 
-	if (cross_page_boundary) {
-		pte = mk_pte(pages[1], pgprot);
-		set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
-	}
+		pages[0] = walk_to_page_node(nid, addr);
+		if (cross_page_boundary)
+			pages[1] = walk_to_page_node(nid, addr + PAGE_SIZE);
 
-	/*
-	 * Loading the temporary mm behaves as a compiler barrier, which
-	 * guarantees that the PTE will be set at the time memcpy() is done.
-	 */
-	prev = use_temporary_mm(poking_mm);
+		BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
 
-	kasan_disable_current();
-	func((u8 *)poking_addr + offset_in_page(addr), src, len);
-	kasan_enable_current();
+		pte = mk_pte(pages[0], pgprot);
+		set_pte_at(poking_mm, poking_addr, ptep, pte);
 
-	/*
-	 * Ensure that the PTE is only cleared after the instructions of memcpy
-	 * were issued by using a compiler barrier.
-	 */
-	barrier();
+		if (cross_page_boundary) {
+			pte = mk_pte(pages[1], pgprot);
+			set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+		}
+		/*
+		 * Compiler barrier to ensure that PTE is set before func()
+		 */
+		barrier();
 
-	pte_clear(poking_mm, poking_addr, ptep);
-	if (cross_page_boundary)
-		pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+		kasan_disable_current();
+		func((u8 *)poking_addr + offset_in_page(addr), src, len);
+		kasan_enable_current();
 
-	/*
-	 * Loading the previous page-table hierarchy requires a serializing
-	 * instruction that already allows the core to see the updated version.
-	 * Xen-PV is assumed to serialize execution in a similar manner.
-	 */
-	unuse_temporary_mm(prev);
+		/*
+		 * Ensure that the PTE is only cleared after the instructions of memcpy
+		 * were issued by using a compiler barrier.
+		 */
+		barrier();
 
-	/*
-	 * Flushing the TLB might involve IPIs, which would require enabled
-	 * IRQs, but not if the mm is not used, as it is in this point.
-	 */
-	flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
-			   (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
-			   PAGE_SHIFT, false);
+		pte_clear(poking_mm, poking_addr, ptep);
+		if (cross_page_boundary)
+			pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+
+		/*
+		 * Loading the previous page-table hierarchy requires a serializing
+		 * instruction that already allows the core to see the updated version.
+		 * Xen-PV is assumed to serialize execution in a similar manner.
+		 */
+		unuse_temporary_mm(prev);
+
+		/*
+		 * Flushing the TLB might involve IPIs, which would require enabled
+		 * IRQs, but not if the mm is not used, as it is in this point.
+		 */
+
+		flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+				(cross_page_boundary ? 2 : 1) * size_in_poking_mm,
+				PAGE_SHIFT, false);
+		if (!has_replica)
+			break;
+	}
 
 	if (func == text_poke_memcpy) {
 		/*
-- 
2.34.1

next prev parent reply	other threads:[~2023-12-28 13:13 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-28 13:10 [PATCH RFC 00/12] x86 NUMA-aware kernel replication artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 01/12] mm: allow per-NUMA node local PUD/PMD allocation artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 02/12] mm: add config option and per-NUMA node VMS support artem.kuzin
2024-01-03 19:43   ` Christoph Lameter (Ampere)
2024-01-09 16:57     ` Artem Kuzin
2024-01-25 15:07       ` Dave Hansen
2024-01-29  6:22         ` Artem Kuzin
2024-01-30 23:36           ` Dave Hansen
2023-12-28 13:10 ` [PATCH RFC 03/12] mm: per-NUMA node replication core infrastructure artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 04/12] x86: add support of memory protection for NUMA replicas artem.kuzin
2024-01-09  6:46   ` Garg, Shivank
2024-01-09 15:53     ` a00561249@china.huawei.com
2024-01-10  6:19       ` Garg, Shivank
2023-12-28 13:10 ` [PATCH RFC 05/12] x86: enable memory protection for replicated memory artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 06/12] x86: align kernel text and rodata using HUGE_PAGE boundary artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 07/12] x86: enable per-NUMA node kernel text and rodata replication artem.kuzin
2023-12-28 13:10 ` artem.kuzin [this message]
2023-12-28 13:10 ` [PATCH RFC 09/12] x86: add support of NUMA replication for efi page tables artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 10/12] mm: add replicas allocation support for vmalloc artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 11/12] x86: add kernel modules text and rodata replication support artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 12/12] mm: set memory permissions for BPF handlers replicas artem.kuzin
2024-01-10 10:03 ` [PATCH RFC 00/12] x86 NUMA-aware kernel replication Russell King (Oracle)
2024-01-25  4:30 ` Garg, Shivank
2024-01-29  7:51   ` Artem Kuzin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231228131056.602411-9-artem.kuzin@huawei.com \
    --to=artem.kuzin@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.grubnikov@huawei.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=guohanjun@huawei.com \
    --cc=hch@infradead.org \
    --cc=hpa@zytor.com \
    --cc=judy.chenhui@huawei.com \
    --cc=kang.sun@huawei.com \
    --cc=linux-mm@kvack.org \
    --cc=linux-modules@vger.kernel.org \
    --cc=lstoakes@gmail.com \
    --cc=luto@kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nikita.panov@huawei-partners.com \
    --cc=peterz@infradead.org \
    --cc=rmk+kernel@armlinux.org.uk \
    --cc=stepanov.anatoly@huawei.com \
    --cc=tglx@linutronix.de \
    --cc=urezki@gmail.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=weiyongjun1@huawei.com \
    --cc=x86@kernel.org \
    --cc=yusongping@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox