linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Brendan Jackman <jackmanb@google.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	 Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	 Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	 Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	 Alexandre Chartre <alexandre.chartre@oracle.com>,
	Liran Alon <liran.alon@oracle.com>,
	 Jan Setje-Eilers <jan.setjeeilers@oracle.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	 Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	 Andrew Morton <akpm@linux-foundation.org>,
	Mel Gorman <mgorman@suse.de>,
	 Lorenzo Stoakes <lstoakes@gmail.com>,
	David Hildenbrand <david@redhat.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	 Michal Hocko <mhocko@kernel.org>,
	Khalid Aziz <khalid.aziz@oracle.com>,
	 Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	 Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	 Valentin Schneider <vschneid@redhat.com>,
	Paul Turner <pjt@google.com>, Reiji Watanabe <reijiw@google.com>,
	 Junaid Shahid <junaids@google.com>,
	Ofir Weisse <oweisse@google.com>,
	 Yosry Ahmed <yosryahmed@google.com>,
	Patrick Bellasi <derkling@google.com>,
	 KP Singh <kpsingh@google.com>,
	Alexandra Sandulescu <aesa@google.com>,
	 Matteo Rizzo <matteorizzo@google.com>,
	Jann Horn <jannh@google.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	 kvm@vger.kernel.org, Brendan Jackman <jackmanb@google.com>
Subject: [PATCH 21/26] KVM: x86: asi: Restricted address space for VM execution
Date: Fri, 12 Jul 2024 17:00:39 +0000	[thread overview]
Message-ID: <20240712-asi-rfc-24-v1-21-144b319a40d8@google.com> (raw)
In-Reply-To: <20240712-asi-rfc-24-v1-0-144b319a40d8@google.com>

An ASI restricted address space is added for KVM. It is currently only
enabled for Intel CPUs.

This change incorporates an extra asi_exit at the end of vcpu_run. We
expect later iterations of ASI to drop that call as we gain the
ablity to context switch within the ASI domain.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/svm/svm.c          |  2 ++
 arch/x86/kvm/vmx/vmx.c          | 36 ++++++++++++++++++++++--------------
 arch/x86/kvm/x86.c              | 29 +++++++++++++++++++++++++++--
 4 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6efd1497b0263..6c3326cb8273c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -36,6 +36,7 @@
 #include <asm/kvm_page_track.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/hyperv-tlfs.h>
+#include <asm/asi.h>
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
@@ -1514,6 +1515,8 @@ struct kvm_arch {
 	 */
 #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
 	struct kvm_mmu_memory_cache split_desc_cache;
+
+	struct asi *asi;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9aaf83c8d57df..6f9a279c12dc7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4108,6 +4108,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
 	guest_state_enter_irqoff();
 
 	amd_clear_divider();
+	asi_enter(vcpu->kvm->arch.asi);
 
 	if (sev_es_guest(vcpu->kvm))
 		__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
@@ -4115,6 +4116,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
 	else
 		__svm_vcpu_run(svm, spec_ctrl_intercepted);
 
+	asi_relax();
 	guest_state_exit_irqoff();
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 22411f4aff530..1105d666a8ade 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -49,6 +49,7 @@
 #include <asm/mwait.h>
 #include <asm/spec-ctrl.h>
 #include <asm/vmx.h>
+#include <asm/asi.h>
 
 #include <trace/events/ipi.h>
 
@@ -7255,14 +7256,32 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 					unsigned int flags)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long cr3;
 
 	guest_state_enter_irqoff();
+	asi_enter(vcpu->kvm->arch.asi);
+
+	/*
+	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
+	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+	 * it switches back to the current->mm, which can occur in KVM context
+	 * when switching to a temporary mm to patch kernel code, e.g. if KVM
+	 * toggles a static key while handling a VM-Exit.
+	 * Also, this must be done after asi_enter(), as it changes CR3
+	 * when switching address spaces.
+	 */
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		vmx->loaded_vmcs->host_state.cr3 = cr3;
+	}
 
 	/*
 	 * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
 	 * mitigation for MDS is done late in VMentry and is still
 	 * executed in spite of L1D Flush. This is because an extra VERW
 	 * should not matter much after the big hammer L1D Flush.
+	 * This is only after asi_enter() for performance reasons.
 	 */
 	if (static_branch_unlikely(&vmx_l1d_should_flush))
 		vmx_l1d_flush(vcpu);
@@ -7283,6 +7302,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 	vmx->idt_vectoring_info = 0;
 
+	asi_relax();
+
 	vmx_enable_fb_clear(vmx);
 
 	if (unlikely(vmx->fail)) {
@@ -7311,7 +7332,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long cr3, cr4;
+	unsigned long cr4;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -7354,19 +7375,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 	vcpu->arch.regs_dirty = 0;
 
-	/*
-	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
-	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
-	 * it switches back to the current->mm, which can occur in KVM context
-	 * when switching to a temporary mm to patch kernel code, e.g. if KVM
-	 * toggles a static key while handling a VM-Exit.
-	 */
-	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-		vmcs_writel(HOST_CR3, cr3);
-		vmx->loaded_vmcs->host_state.cr3 = cr3;
-	}
-
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91478b769af08..b9947e88d4ac6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -85,6 +85,7 @@
 #include <asm/emulate_prefix.h>
 #include <asm/sgx.h>
 #include <clocksource/hyperv_timer.h>
+#include <asm/asi.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -318,6 +319,8 @@ u64 __read_mostly host_xcr0;
 
 static struct kmem_cache *x86_emulator_cache;
 
+static int __read_mostly kvm_asi_index = -1;
+
 /*
  * When called, it means the previous get/set msr reached an invalid msr.
  * Return true if we want to ignore/silent this failed msr access.
@@ -9750,6 +9753,11 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 	if (r)
 		goto out_free_percpu;
 
+	r = asi_register_class("KVM", NULL);
+	if (r < 0)
+		goto out_mmu_exit;
+	kvm_asi_index = r;
+
 	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 		kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@ -9767,7 +9775,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 
 	r = ops->hardware_setup();
 	if (r != 0)
-		goto out_mmu_exit;
+		goto out_asi_unregister;
 
 	kvm_ops_update(ops);
 
@@ -9820,6 +9828,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 out_unwind_ops:
 	kvm_x86_ops.hardware_enable = NULL;
 	static_call(kvm_x86_hardware_unsetup)();
+out_asi_unregister:
+	asi_unregister_class(kvm_asi_index);
 out_mmu_exit:
 	kvm_mmu_vendor_module_exit();
 out_free_percpu:
@@ -9851,6 +9861,7 @@ void kvm_x86_vendor_exit(void)
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	static_call(kvm_x86_hardware_unsetup)();
+	asi_unregister_class(kvm_asi_index);
 	kvm_mmu_vendor_module_exit();
 	free_percpu(user_return_msrs);
 	kmem_cache_destroy(x86_emulator_cache);
@@ -11436,6 +11447,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 	r = vcpu_run(vcpu);
 
+	/*
+	 * At present ASI doesn't have the capability to transition directly
+	 * from the restricted address space to the user address space. So we
+	 * just return to the unrestricted address space in between.
+	 */
+	asi_exit();
+
 out:
 	kvm_put_guest_fpu(vcpu);
 	if (kvm_run->kvm_valid_regs)
@@ -12539,10 +12557,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm_mmu_init_vm(kvm);
 
-	ret = static_call(kvm_x86_vm_init)(kvm);
+	ret = asi_init(kvm->mm, kvm_asi_index, &kvm->arch.asi);
 	if (ret)
 		goto out_uninit_mmu;
 
+	ret = static_call(kvm_x86_vm_init)(kvm);
+	if (ret)
+		goto out_asi_destroy;
+
 	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -12579,6 +12601,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	return 0;
 
+out_asi_destroy:
+	asi_destroy(kvm->arch.asi);
 out_uninit_mmu:
 	kvm_mmu_uninit_vm(kvm);
 	kvm_page_track_cleanup(kvm);
@@ -12720,6 +12744,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_destroy_vcpus(kvm);
 	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 	kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+	asi_destroy(kvm->arch.asi);
 	kvm_mmu_uninit_vm(kvm);
 	kvm_page_track_cleanup(kvm);
 	kvm_xen_destroy_vm(kvm);

-- 
2.45.2.993.g49e7a77208-goog



  parent reply	other threads:[~2024-07-12 17:02 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-12 17:00 [PATCH 00/26] Address Space Isolation (ASI) 2024 Brendan Jackman
2024-07-12 17:00 ` [PATCH 01/26] mm: asi: Make some utility functions noinstr compatible Brendan Jackman
2024-10-25 11:41   ` Borislav Petkov
2024-10-25 13:21     ` Brendan Jackman
2024-10-29 17:38       ` Junaid Shahid
2024-10-29 19:12         ` Thomas Gleixner
2024-11-01  1:44           ` Junaid Shahid
2024-11-01 10:06             ` Brendan Jackman
2024-11-01 20:27             ` Thomas Gleixner
2024-11-05 21:40               ` Junaid Shahid
2024-12-13 14:45               ` Brendan Jackman
2024-07-12 17:00 ` [PATCH 02/26] x86: Create CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION Brendan Jackman
2024-07-22  7:55   ` Geert Uytterhoeven
2024-07-12 17:00 ` [PATCH 03/26] mm: asi: Introduce ASI core API Brendan Jackman
2024-07-12 17:00 ` [PATCH 04/26] objtool: let some noinstr functions make indirect calls Brendan Jackman
2024-07-12 17:00 ` [PATCH 05/26] mm: asi: Add infrastructure for boot-time enablement Brendan Jackman
2024-07-12 17:00 ` [PATCH 06/26] mm: asi: ASI support in interrupts/exceptions Brendan Jackman
2024-07-12 17:00 ` [PATCH 07/26] mm: asi: Switch to unrestricted address space before a context switch Brendan Jackman
2024-07-12 17:00 ` [PATCH 08/26] mm: asi: Use separate PCIDs for restricted address spaces Brendan Jackman
2024-07-12 17:00 ` [PATCH 09/26] mm: asi: Make __get_current_cr3_fast() ASI-aware Brendan Jackman
2024-07-12 17:00 ` [PATCH 10/26] mm: asi: Avoid warning from NMI userspace accesses in ASI context Brendan Jackman
2024-07-14  3:59   ` kernel test robot
2024-07-12 17:00 ` [PATCH 11/26] mm: asi: ASI page table allocation functions Brendan Jackman
2024-07-12 17:00 ` [PATCH 12/26] mm: asi: asi_exit() on PF, skip handling if address is accessible Brendan Jackman
2024-07-12 17:00 ` [PATCH 13/26] mm: asi: Functions to map/unmap a memory range into ASI page tables Brendan Jackman
2024-07-12 17:00 ` [PATCH 14/26] mm: asi: Add basic infrastructure for global non-sensitive mappings Brendan Jackman
2024-07-12 17:00 ` [PATCH 15/26] mm: Add __PAGEFLAG_FALSE Brendan Jackman
2024-07-12 17:00 ` [PATCH 16/26] mm: asi: Map non-user buddy allocations as nonsensitive Brendan Jackman
2024-08-21 13:59   ` Brendan Jackman
2024-07-12 17:00 ` [PATCH 17/26] mm: asi: Map kernel text and static data " Brendan Jackman
2024-07-12 17:00 ` [PATCH 18/26] mm: asi: Map vmalloc/vmap data as nonsesnitive Brendan Jackman
2024-07-13 15:53   ` kernel test robot
2024-07-12 17:00 ` [PATCH 19/26] percpu: clean up all mappings when pcpu_map_pages() fails Brendan Jackman
2024-07-16  1:33   ` Yosry Ahmed
2024-07-12 17:00 ` [PATCH 20/26] mm: asi: Map dynamic percpu memory as nonsensitive Brendan Jackman
2024-07-12 17:00 ` Brendan Jackman [this message]
2024-07-12 17:00 ` [PATCH 22/26] KVM: x86: asi: Stabilize CR3 when potentially accessing with ASI Brendan Jackman
2024-07-12 17:00 ` [PATCH 23/26] mm: asi: Stabilize CR3 in switch_mm_irqs_off() Brendan Jackman
2024-07-12 17:00 ` [PATCH 24/26] mm: asi: Make TLB flushing correct under ASI Brendan Jackman
2024-07-12 17:00 ` [PATCH 25/26] mm: asi: Stop ignoring asi=on cmdline flag Brendan Jackman
2024-07-12 17:00 ` [PATCH 26/26] KVM: x86: asi: Add some mitigations on address space transitions Brendan Jackman
2024-07-14  5:02   ` kernel test robot
2024-08-20 10:52   ` Shivank Garg
2024-08-21  9:38     ` Brendan Jackman
2024-08-21 16:00       ` Shivank Garg
2024-07-12 17:09 ` [PATCH 00/26] Address Space Isolation (ASI) 2024 Brendan Jackman
2024-09-11 16:37 ` Brendan Jackman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240712-asi-rfc-24-v1-21-144b319a40d8@google.com \
    --to=jackmanb@google.com \
    --cc=aesa@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexandre.chartre@oracle.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=derkling@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hpa@zytor.com \
    --cc=jan.setjeeilers@oracle.com \
    --cc=jannh@google.com \
    --cc=junaids@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=khalid.aziz@oracle.com \
    --cc=kpsingh@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liran.alon@oracle.com \
    --cc=lstoakes@gmail.com \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=matteorizzo@google.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=reijiw@google.com \
    --cc=rostedt@goodmis.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox