linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: Ofir Weisse <oweisse@google.com>,
	kvm@vger.kernel.org, pbonzini@redhat.com,  jmattson@google.com,
	pjt@google.com, alexandre.chartre@oracle.com,
	 rppt@linux.ibm.com, dave.hansen@linux.intel.com,
	peterz@infradead.org,  tglx@linutronix.de, luto@kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush
Date: Tue, 22 Feb 2022 21:22:23 -0800	[thread overview]
Message-ID: <20220223052223.1202152-48-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

From: Ofir Weisse <oweisse@google.com>

There are several locations where this is important. Especially since a
task_struct might be reused, potentially with a different mm.

1. Map in vcpu_run() @ arch/x86/kvm/x86.c
1. Unmap in release_task_stack() @ kernel/fork.c
2. Unmap in do_exit() @ kernel/exit.c
3. Unmap in begin_new_exec() @ fs/exec.c

Signed-off-by: Ofir Weisse <oweisse@google.com>


---
 arch/x86/include/asm/asi.h |  6 ++++
 arch/x86/kvm/x86.c         |  6 ++++
 arch/x86/mm/asi.c          | 59 ++++++++++++++++++++++++++++++++++++++
 fs/exec.c                  |  7 ++++-
 include/asm-generic/asi.h  | 16 +++++++++--
 include/linux/sched.h      |  5 ++++
 kernel/exit.c              |  2 +-
 kernel/fork.c              | 22 +++++++++++++-
 8 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index 6148e65fb0c2..9d8f43981678 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -87,6 +87,12 @@ void asi_unmap_user(struct asi *asi, void *va, size_t len);
 int  asi_fill_pgtbl_pool(struct asi_pgtbl_pool *pool, uint count, gfp_t flags);
 void asi_clear_pgtbl_pool(struct asi_pgtbl_pool *pool);
 
+int asi_map_task_stack(struct task_struct *tsk, struct asi *asi);
+void asi_unmap_task_stack(struct task_struct *tsk);
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm);
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order);
+
 static inline void asi_init_pgtbl_pool(struct asi_pgtbl_pool *pool)
 {
 	pool->pgtbl_list = NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 294f73e9e71e..718104eefaed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10122,6 +10122,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vcpu->arch.l1tf_flush_l1d = true;
 
+	/* We must have current->stack mapped into asi. This function can be
+	 * safely called many times, as it will only do the actual mapping once. */
+	r = asi_map_task_stack(current, vcpu->kvm->asi);
+	if (r != 0)
+		return r;
+
 	for (;;) {
 		if (kvm_vcpu_running(vcpu)) {
 			r = vcpu_enter_guest(vcpu);
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 7f2aa1823736..a86ac6644a57 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -1029,6 +1029,45 @@ void asi_unmap(struct asi *asi, void *addr, size_t len, bool flush_tlb)
 		asi_flush_tlb_range(asi, addr, len);
 }
 
+int asi_map_task_stack(struct task_struct *tsk, struct asi *asi)
+{
+        int ret;
+
+        /* If the stack is already mapped to asi - don't need to map it again. */
+        if (tsk->asi_stack_mapped)
+                return 0;
+
+        if (!tsk->mm)
+                return -EINVAL;
+
+        /* If the stack was allocated via the page allocator, we assume the
+         * stack pages were marked with PageNonSensitive, therefore tsk->stack
+         * address is properly aliased. */
+        ret = asi_map(ASI_LOCAL_NONSENSITIVE, tsk->stack, THREAD_SIZE);
+        if (!ret) {
+		tsk->asi_stack_mapped = asi;
+		asi_sync_mapping(asi, tsk->stack, THREAD_SIZE);
+	}
+
+        return ret;
+}
+
+void asi_unmap_task_stack(struct task_struct *tsk)
+{
+        /* No need to unmap if the stack was not mapped to begin with. */
+        if (!tsk->asi_stack_mapped)
+                return;
+
+        if (!tsk->mm)
+                return;
+
+        asi_unmap(ASI_LOCAL_NONSENSITIVE, tsk->stack, THREAD_SIZE,
+                  /* flush_tlb = */ true);
+
+        tsk->asi_stack_mapped = NULL;
+}
+
+
 void *asi_va(unsigned long pa)
 {
 	struct page *page = pfn_to_page(PHYS_PFN(pa));
@@ -1336,3 +1375,23 @@ void asi_unmap_user(struct asi *asi, void *addr, size_t len)
 	}
 }
 EXPORT_SYMBOL_GPL(asi_unmap_user);
+
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm)
+{
+        uint i;
+        for (i = 0; i < (1 << order); i++) {
+                __SetPageLocalNonSensitive(pages + i);
+                pages[i].asi_mm = mm;
+	}
+}
+
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order)
+{
+        uint i;
+        for (i = 0; i < (1 << order); i++) {
+                __ClearPageLocalNonSensitive(pages + i);
+                pages[i].asi_mm = NULL;
+	}
+}
+
diff --git a/fs/exec.c b/fs/exec.c
index 76f3b433e80d..fb9182cf3f33 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -69,6 +69,7 @@
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
+#include <asm/asi.h>
 
 #include <trace/events/task.h>
 #include "internal.h"
@@ -1238,7 +1239,11 @@ int begin_new_exec(struct linux_binprm * bprm)
 	struct task_struct *me = current;
 	int retval;
 
-        /* TODO: (oweisse) unmap the stack from ASI */
+        /* The old mm is about to be released later on in exec_mmap. We are
+         * reusing the task, including its stack which was mapped to
+         * mm->asi_pgd[0]. We need to asi_unmap the stack, so the destructor of
+         * the mm won't complain on "lingering" asi mappings. */
+        asi_unmap_task_stack(current);
 
 	/* Once we are committed compute the creds */
 	retval = bprm_creds_from_file(bprm);
diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h
index 2763cb1a974c..6e9a261a2b9d 100644
--- a/include/asm-generic/asi.h
+++ b/include/asm-generic/asi.h
@@ -66,8 +66,13 @@ static inline struct asi *asi_get_target(void) { return NULL; }
 
 static inline struct asi *asi_get_current(void) { return NULL; }
 
-static inline
-int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags)
+static inline int asi_map_task_stack(struct task_struct *tsk, struct asi *asi)
+{ return 0; }
+
+static inline void asi_unmap_task_stack(struct task_struct *tsk) { }
+
+static inline int asi_map_gfp(struct asi *asi, void *addr, size_t len,
+			      gfp_t gfp_flags)
 {
 	return 0;
 }
@@ -130,6 +135,13 @@ static inline int asi_load_module(struct module* module) {return 0;}
 
 static inline void asi_unload_module(struct module* module) { }
 
+static inline
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm) { }
+
+static inline
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order) { }
+
 #endif  /* !_ASSEMBLY_ */
 
 #endif /* !CONFIG_ADDRESS_SPACE_ISOLATION */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..87ad45e52b19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -67,6 +67,7 @@ struct sighand_struct;
 struct signal_struct;
 struct task_delay_info;
 struct task_group;
+struct asi;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -1470,6 +1471,10 @@ struct task_struct {
 	int				mce_count;
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+        struct asi *asi_stack_mapped;
+#endif
+
 #ifdef CONFIG_KRETPROBES
 	struct llist_head               kretprobe_instances;
 #endif
diff --git a/kernel/exit.c b/kernel/exit.c
index ab2749cf6887..f21cc21814d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -768,7 +768,7 @@ void __noreturn do_exit(long code)
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
-        /* TODO: (oweisse) unmap the stack from ASI */
+	asi_unmap_task_stack(tsk);
 
 	coredump_task_exit(tsk);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
diff --git a/kernel/fork.c b/kernel/fork.c
index cb147a72372d..876fefc477cb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -216,7 +216,6 @@ static int free_vm_stack_cache(unsigned int cpu)
 
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
-  /* TODO: (oweisse) Add annotation to map the stack into ASI */
 #ifdef CONFIG_VMAP_STACK
 	void *stack;
 	int i;
@@ -269,7 +268,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);
 
+        /* When marking pages as PageLocalNonSesitive we set the page->mm to be
+         * NULL. We must make sure the flag is cleared from the stack pages
+         * before free_pages is called. Otherwise, page->mm will be accessed
+         * which will reuslt in NULL reference. page_address() below will yield
+         * an aliased address after ASI_LOCAL_MAP, thanks to
+         * PageLocalNonSesitive flag. */
 	if (likely(page)) {
+                asi_mark_pages_local_nonsensitive(page,
+                                                  THREAD_SIZE_ORDER,
+                                                  NULL);
 		tsk->stack = kasan_reset_tag(page_address(page));
 		return tsk->stack;
 	}
@@ -301,6 +309,14 @@ static inline void free_thread_stack(struct task_struct *tsk)
 	}
 #endif
 
+        /* We must clear the PageNonSensitive flag before calling free_pages().
+         * Otherwise page->mm (which is NULL) will be accessed, in order to
+         * unmap the pages from ASI. Specifically for the stack, we assume the
+         * pages were already unmapped from ASI before we got here, via
+         * asi_unmap_task_stack(). */
+        asi_clear_pages_local_nonsensitive(virt_to_page(tsk->stack),
+                                                        THREAD_SIZE_ORDER);
+
 	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
 }
 # else
@@ -436,6 +452,7 @@ static void release_task_stack(struct task_struct *tsk)
 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
 		return;  /* Better to leak the stack than to free prematurely */
 
+        asi_unmap_task_stack(tsk);
 	account_kernel_stack(tsk, -1);
 	free_thread_stack(tsk);
 	tsk->stack = NULL;
@@ -916,6 +933,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	 * functions again.
 	 */
 	tsk->stack = stack;
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	tsk->asi_stack_mapped = NULL;
+#endif
 #ifdef CONFIG_VMAP_STACK
 	tsk->stack_vm_area = stack_vm_area;
 #endif
-- 
2.35.1.473.g83b2b277ed-goog



  parent reply	other threads:[~2022-02-23  5:25 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` Junaid Shahid [this message]
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-48-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox