From: <artem.kuzin@huawei.com>
To: <x86@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>,
<bp@alien8.de>, <dave.hansen@linux.intel.com>, <hpa@zytor.com>,
<luto@kernel.org>, <peterz@infradead.org>,
<akpm@linux-foundation.org>, <urezki@gmail.com>,
<hch@infradead.org>, <lstoakes@gmail.com>, <mcgrof@kernel.org>,
<rmk+kernel@armlinux.org.uk>
Cc: <nikita.panov@huawei-partners.com>,
<alexander.grubnikov@huawei.com>, <stepanov.anatoly@huawei.com>,
<guohanjun@huawei.com>, <weiyongjun1@huawei.com>,
<wangkefeng.wang@huawei.com>, <judy.chenhui@huawei.com>,
<yusongping@huawei.com>, <kang.sun@huawei.com>,
<linux-mm@kvack.org>, <linux-modules@vger.kernel.org>
Subject: [PATCH RFC 08/12] x86: make kernel text patching aware about replicas
Date: Thu, 28 Dec 2023 21:10:52 +0800 [thread overview]
Message-ID: <20231228131056.602411-9-artem.kuzin@huawei.com> (raw)
In-Reply-To: <20231228131056.602411-1-artem.kuzin@huawei.com>
From: Artem Kuzin <artem.kuzin@huawei.com>
Co-developed-by: Nikita Panov <nikita.panov@huawei-partners.com>
Signed-off-by: Nikita Panov <nikita.panov@huawei-partners.com>
Co-developed-by: Alexander Grubnikov <alexander.grubnikov@huawei.com>
Signed-off-by: Alexander Grubnikov <alexander.grubnikov@huawei.com>
Signed-off-by: Artem Kuzin <artem.kuzin@huawei.com>
---
arch/x86/kernel/alternative.c | 116 ++++++++++++++++++----------------
1 file changed, 62 insertions(+), 54 deletions(-)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 44843a492e69..b0abd60bcafe 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -18,6 +18,7 @@
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
+#include <linux/numa_replication.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -1659,6 +1660,7 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
{
unsigned long flags;
+ int nid;
if (boot_cpu_has(X86_FEATURE_NX) &&
is_module_text_address((unsigned long)addr)) {
@@ -1669,8 +1671,18 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
*/
memcpy(addr, opcode, len);
} else {
+ unsigned long iaddr = (unsigned long)addr;
+
local_irq_save(flags);
- memcpy(addr, opcode, len);
+ if (is_text_replicated() && is_kernel_text(iaddr)) {
+ for_each_replica(nid) {
+ void *vaddr = numa_addr_in_replica(addr, nid);
+
+ memcpy(vaddr, opcode, len);
+ }
+ } else {
+ memcpy(addr, opcode, len);
+ }
local_irq_restore(flags);
sync_core();
@@ -1764,36 +1776,21 @@ typedef void text_poke_f(void *dst, const void *src, size_t len);
static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len)
{
+ int nid;
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
struct page *pages[2] = {NULL};
temp_mm_state_t prev;
unsigned long flags;
+ int size_in_poking_mm = PAGE_SIZE;
pte_t pte, *ptep;
spinlock_t *ptl;
pgprot_t pgprot;
-
+ bool has_replica = numa_addr_has_replica(addr);
/*
* While boot memory allocator is running we cannot use struct pages as
* they are not yet initialized. There is no way to recover.
*/
BUG_ON(!after_bootmem);
-
- if (!core_kernel_text((unsigned long)addr)) {
- pages[0] = vmalloc_to_page(addr);
- if (cross_page_boundary)
- pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
- } else {
- pages[0] = virt_to_page(addr);
- WARN_ON(!PageReserved(pages[0]));
- if (cross_page_boundary)
- pages[1] = virt_to_page(addr + PAGE_SIZE);
- }
- /*
- * If something went wrong, crash and burn since recovery paths are not
- * implemented.
- */
- BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
-
/*
* Map the page without the global bit, as TLB flushing is done with
* flush_tlb_mm_range(), which is intended for non-global PTEs.
@@ -1812,48 +1809,59 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
local_irq_save(flags);
- pte = mk_pte(pages[0], pgprot);
- set_pte_at(poking_mm, poking_addr, ptep, pte);
+ for_each_replica(nid) {
+ prev = use_temporary_mm(poking_mm);
- if (cross_page_boundary) {
- pte = mk_pte(pages[1], pgprot);
- set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
- }
+ pages[0] = walk_to_page_node(nid, addr);
+ if (cross_page_boundary)
+ pages[1] = walk_to_page_node(nid, addr + PAGE_SIZE);
- /*
- * Loading the temporary mm behaves as a compiler barrier, which
- * guarantees that the PTE will be set at the time memcpy() is done.
- */
- prev = use_temporary_mm(poking_mm);
+ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
- kasan_disable_current();
- func((u8 *)poking_addr + offset_in_page(addr), src, len);
- kasan_enable_current();
+ pte = mk_pte(pages[0], pgprot);
+ set_pte_at(poking_mm, poking_addr, ptep, pte);
- /*
- * Ensure that the PTE is only cleared after the instructions of memcpy
- * were issued by using a compiler barrier.
- */
- barrier();
+ if (cross_page_boundary) {
+ pte = mk_pte(pages[1], pgprot);
+ set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+ }
+ /*
+ * Compiler barrier to ensure that PTE is set before func()
+ */
+ barrier();
- pte_clear(poking_mm, poking_addr, ptep);
- if (cross_page_boundary)
- pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+ kasan_disable_current();
+ func((u8 *)poking_addr + offset_in_page(addr), src, len);
+ kasan_enable_current();
- /*
- * Loading the previous page-table hierarchy requires a serializing
- * instruction that already allows the core to see the updated version.
- * Xen-PV is assumed to serialize execution in a similar manner.
- */
- unuse_temporary_mm(prev);
+ /*
+ * Ensure that the PTE is only cleared after the instructions of memcpy
+ * were issued by using a compiler barrier.
+ */
+ barrier();
- /*
- * Flushing the TLB might involve IPIs, which would require enabled
- * IRQs, but not if the mm is not used, as it is in this point.
- */
- flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
- (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
- PAGE_SHIFT, false);
+ pte_clear(poking_mm, poking_addr, ptep);
+ if (cross_page_boundary)
+ pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+
+ /*
+ * Loading the previous page-table hierarchy requires a serializing
+ * instruction that already allows the core to see the updated version.
+ * Xen-PV is assumed to serialize execution in a similar manner.
+ */
+ unuse_temporary_mm(prev);
+
+ /*
+ * Flushing the TLB might involve IPIs, which would require enabled
+ * IRQs, but not if the mm is not used, as it is in this point.
+ */
+
+ flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+ (cross_page_boundary ? 2 : 1) * size_in_poking_mm,
+ PAGE_SHIFT, false);
+ if (!has_replica)
+ break;
+ }
if (func == text_poke_memcpy) {
/*
--
2.34.1
next prev parent reply other threads:[~2023-12-28 13:13 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-28 13:10 [PATCH RFC 00/12] x86 NUMA-aware kernel replication artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 01/12] mm: allow per-NUMA node local PUD/PMD allocation artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 02/12] mm: add config option and per-NUMA node VMS support artem.kuzin
2024-01-03 19:43 ` Christoph Lameter (Ampere)
2024-01-09 16:57 ` Artem Kuzin
2024-01-25 15:07 ` Dave Hansen
2024-01-29 6:22 ` Artem Kuzin
2024-01-30 23:36 ` Dave Hansen
2023-12-28 13:10 ` [PATCH RFC 03/12] mm: per-NUMA node replication core infrastructure artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 04/12] x86: add support of memory protection for NUMA replicas artem.kuzin
2024-01-09 6:46 ` Garg, Shivank
2024-01-09 15:53 ` a00561249@china.huawei.com
2024-01-10 6:19 ` Garg, Shivank
2023-12-28 13:10 ` [PATCH RFC 05/12] x86: enable memory protection for replicated memory artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 06/12] x86: align kernel text and rodata using HUGE_PAGE boundary artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 07/12] x86: enable per-NUMA node kernel text and rodata replication artem.kuzin
2023-12-28 13:10 ` artem.kuzin [this message]
2023-12-28 13:10 ` [PATCH RFC 09/12] x86: add support of NUMA replication for efi page tables artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 10/12] mm: add replicas allocation support for vmalloc artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 11/12] x86: add kernel modules text and rodata replication support artem.kuzin
2023-12-28 13:10 ` [PATCH RFC 12/12] mm: set memory permissions for BPF handlers replicas artem.kuzin
2024-01-10 10:03 ` [PATCH RFC 00/12] x86 NUMA-aware kernel replication Russell King (Oracle)
2024-01-25 4:30 ` Garg, Shivank
2024-01-29 7:51 ` Artem Kuzin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231228131056.602411-9-artem.kuzin@huawei.com \
--to=artem.kuzin@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.grubnikov@huawei.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=guohanjun@huawei.com \
--cc=hch@infradead.org \
--cc=hpa@zytor.com \
--cc=judy.chenhui@huawei.com \
--cc=kang.sun@huawei.com \
--cc=linux-mm@kvack.org \
--cc=linux-modules@vger.kernel.org \
--cc=lstoakes@gmail.com \
--cc=luto@kernel.org \
--cc=mcgrof@kernel.org \
--cc=mingo@redhat.com \
--cc=nikita.panov@huawei-partners.com \
--cc=peterz@infradead.org \
--cc=rmk+kernel@armlinux.org.uk \
--cc=stepanov.anatoly@huawei.com \
--cc=tglx@linutronix.de \
--cc=urezki@gmail.com \
--cc=wangkefeng.wang@huawei.com \
--cc=weiyongjun1@huawei.com \
--cc=x86@kernel.org \
--cc=yusongping@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox