linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Zong Li <zong.li@sifive.com>
To: Deepak Gupta <debug@rivosinc.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	 Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org,  "H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	 "Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	 Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	 Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	 Conor Dooley <conor@kernel.org>, Rob Herring <robh@kernel.org>,
	 Krzysztof Kozlowski <krzk+dt@kernel.org>,
	Arnd Bergmann <arnd@arndb.de>,
	 Christian Brauner <brauner@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	 Oleg Nesterov <oleg@redhat.com>,
	Eric Biederman <ebiederm@xmission.com>,
	Kees Cook <kees@kernel.org>,  Jonathan Corbet <corbet@lwn.net>,
	Shuah Khan <shuah@kernel.org>, Jann Horn <jannh@google.com>,
	 Conor Dooley <conor+dt@kernel.org>,
	linux-kernel@vger.kernel.org,  linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org,  linux-riscv@lists.infradead.org,
	devicetree@vger.kernel.org,  linux-arch@vger.kernel.org,
	linux-doc@vger.kernel.org,  linux-kselftest@vger.kernel.org,
	alistair.francis@wdc.com,  richard.henderson@linaro.org,
	jim.shu@sifive.com, andybnac@gmail.com,  kito.cheng@sifive.com,
	charlie@rivosinc.com, atishp@rivosinc.com,  evan@rivosinc.com,
	cleger@rivosinc.com, alexghiti@rivosinc.com,
	 samitolvanen@google.com, broonie@kernel.org,
	rick.p.edgecombe@intel.com
Subject: Re: [PATCH v11 11/27] riscv/shstk: If needed allocate a new shadow stack on clone
Date: Fri, 14 Mar 2025 16:30:35 +0800	[thread overview]
Message-ID: <CANXhq0rdYLxUwpHHu9ErYZW93dGF20-ra7TfeKPv8ounu1rC9w@mail.gmail.com> (raw)
In-Reply-To: <20250310-v5_user_cfi_series-v11-11-86b36cbfb910@rivosinc.com>

On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote:
>
> Userspace specifies CLONE_VM to share address space and spawn new thread.
> `clone` allow userspace to specify a new stack for new thread. However
> there is no way to specify new shadow stack base address without changing
> API. This patch allocates a new shadow stack whenever CLONE_VM is given.
>
> In case of CLONE_VFORK, parent is suspended until child finishes and thus
> can child use parent shadow stack. In case of !CLONE_VM, COW kicks in
> because entire address space is copied from parent to child.
>
> `clone3` is extensible and can provide mechanisms using which shadow stack
> as an input parameter can be provided. This is not settled yet and being
> extensively discussed on mailing list. Once that's settled, this commit
> will adapt to that.
>
> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> ---
>  arch/riscv/include/asm/mmu_context.h |   7 ++
>  arch/riscv/include/asm/usercfi.h     |  25 ++++++++
>  arch/riscv/kernel/process.c          |   9 +++
>  arch/riscv/kernel/usercfi.c          | 120 +++++++++++++++++++++++++++++++++++
>  4 files changed, 161 insertions(+)
>
> diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
> index 8c4bc49a3a0f..dbf27a78df6c 100644
> --- a/arch/riscv/include/asm/mmu_context.h
> +++ b/arch/riscv/include/asm/mmu_context.h
> @@ -48,6 +48,13 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm)
>  }
>  #endif
>
> +#define deactivate_mm deactivate_mm
> +static inline void deactivate_mm(struct task_struct *tsk,
> +                                struct mm_struct *mm)
> +{
> +       shstk_release(tsk);
> +}
> +
>  #include <asm-generic/mmu_context.h>
>
>  #endif /* _ASM_RISCV_MMU_CONTEXT_H */
> diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h
> index 5f2027c51917..82d28ac98d76 100644
> --- a/arch/riscv/include/asm/usercfi.h
> +++ b/arch/riscv/include/asm/usercfi.h
> @@ -8,6 +8,9 @@
>  #ifndef __ASSEMBLY__
>  #include <linux/types.h>
>
> +struct task_struct;
> +struct kernel_clone_args;
> +
>  #ifdef CONFIG_RISCV_USER_CFI
>  struct cfi_status {
>         unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
> @@ -17,6 +20,28 @@ struct cfi_status {
>         unsigned long shdw_stk_size; /* size of shadow stack */
>  };
>
> +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
> +                                      const struct kernel_clone_args *args);
> +void shstk_release(struct task_struct *tsk);
> +void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size);
> +unsigned long get_shstk_base(struct task_struct *task, unsigned long *size);
> +void set_active_shstk(struct task_struct *task, unsigned long shstk_addr);
> +bool is_shstk_enabled(struct task_struct *task);
> +
> +#else
> +
> +#define shstk_alloc_thread_stack(tsk, args) 0
> +
> +#define shstk_release(tsk)
> +
> +#define get_shstk_base(task, size) 0UL
> +
> +#define set_shstk_base(task, shstk_addr, size)
> +
> +#define set_active_shstk(task, shstk_addr)
> +
> +#define is_shstk_enabled(task) false
> +
>  #endif /* CONFIG_RISCV_USER_CFI */
>
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index 7c244de77180..99acb6342a37 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -29,6 +29,7 @@
>  #include <asm/vector.h>
>  #include <asm/cpufeature.h>
>  #include <asm/exec.h>
> +#include <asm/usercfi.h>
>
>  #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
>  #include <linux/stackprotector.h>
> @@ -211,6 +212,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>         unsigned long clone_flags = args->flags;
>         unsigned long usp = args->stack;
>         unsigned long tls = args->tls;
> +       unsigned long ssp = 0;
>         struct pt_regs *childregs = task_pt_regs(p);
>
>         /* Ensure all threads in this mm have the same pointer masking mode. */
> @@ -229,11 +231,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 p->thread.s[0] = (unsigned long)args->fn;
>                 p->thread.s[1] = (unsigned long)args->fn_arg;
>         } else {
> +               /* allocate new shadow stack if needed. In case of CLONE_VM we have to */
> +               ssp = shstk_alloc_thread_stack(p, args);
> +               if (IS_ERR_VALUE(ssp))
> +                       return PTR_ERR((void *)ssp);
> +
>                 *childregs = *(current_pt_regs());
>                 /* Turn off status.VS */
>                 riscv_v_vstate_off(childregs);
>                 if (usp) /* User fork */
>                         childregs->sp = usp;
> +               /* if needed, set new ssp */
> +               ssp ? set_active_shstk(p, ssp) : 0;
>                 if (clone_flags & CLONE_SETTLS)
>                         childregs->tp = tls;
>                 childregs->a0 = 0; /* Return value of fork() */
> diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c
> index 24022809a7b5..73cf87dab186 100644
> --- a/arch/riscv/kernel/usercfi.c
> +++ b/arch/riscv/kernel/usercfi.c
> @@ -19,6 +19,41 @@
>
>  #define SHSTK_ENTRY_SIZE sizeof(void *)
>
> +bool is_shstk_enabled(struct task_struct *task)
> +{
> +       return task->thread_info.user_cfi_state.ubcfi_en ? true : false;
> +}
> +
> +void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size)
> +{
> +       task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr;
> +       task->thread_info.user_cfi_state.shdw_stk_size = size;
> +}
> +
> +unsigned long get_shstk_base(struct task_struct *task, unsigned long *size)
> +{
> +       if (size)
> +               *size = task->thread_info.user_cfi_state.shdw_stk_size;
> +       return task->thread_info.user_cfi_state.shdw_stk_base;
> +}
> +
> +void set_active_shstk(struct task_struct *task, unsigned long shstk_addr)
> +{
> +       task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr;
> +}
> +
> +/*
> + * If size is 0, then to be compatible with regular stack we want it to be as big as
> + * regular stack. Else PAGE_ALIGN it and return back
> + */
> +static unsigned long calc_shstk_size(unsigned long size)
> +{
> +       if (size)
> +               return PAGE_ALIGN(size);
> +
> +       return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
> +}
> +
>  /*
>   * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
>   * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
> @@ -142,3 +177,88 @@ SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsi
>
>         return allocate_shadow_stack(addr, aligned_size, size, set_tok);
>  }
> +
> +/*
> + * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for
> + * cases where CLONE_VM is specified and thus a different stack is specified by user. We
> + * thus need a separate shadow stack too. How does separate shadow stack is specified by
> + * user is still being debated. Once that's settled, remove this part of the comment.
> + * This function simply returns 0 if shadow stack are not supported or if separate shadow
> + * stack allocation is not needed (like in case of !CLONE_VM)
> + */
> +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
> +                                      const struct kernel_clone_args *args)
> +{
> +       unsigned long addr, size;
> +
> +       /* If shadow stack is not supported, return 0 */
> +       if (!cpu_supports_shadow_stack())
> +               return 0;
> +
> +       /*
> +        * If shadow stack is not enabled on the new thread, skip any
> +        * switch to a new shadow stack.
> +        */
> +       if (!is_shstk_enabled(tsk))
> +               return 0;
> +
> +       /*
> +        * For CLONE_VFORK the child will share the parents shadow stack.
> +        * Set base = 0 and size = 0, this is special means to track this state
> +        * so the freeing logic run for child knows to leave it alone.
> +        */
> +       if (args->flags & CLONE_VFORK) {
> +               set_shstk_base(tsk, 0, 0);
> +               return 0;
> +       }
> +
> +       /*
> +        * For !CLONE_VM the child will use a copy of the parents shadow
> +        * stack.
> +        */
> +       if (!(args->flags & CLONE_VM))
> +               return 0;
> +
> +       /*
> +        * reaching here means, CLONE_VM was specified and thus a separate shadow
> +        * stack is needed for new cloned thread. Note: below allocation is happening
> +        * using current mm.
> +        */
> +       size = calc_shstk_size(args->stack_size);
> +       addr = allocate_shadow_stack(0, size, 0, false);
> +       if (IS_ERR_VALUE(addr))
> +               return addr;
> +
> +       set_shstk_base(tsk, addr, size);
> +
> +       return addr + size;
> +}
> +
> +void shstk_release(struct task_struct *tsk)
> +{
> +       unsigned long base = 0, size = 0;
> +       /* If shadow stack is not supported or not enabled, nothing to release */
> +       if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk))
> +               return;
> +
> +       /*
> +        * When fork() with CLONE_VM fails, the child (tsk) already has a
> +        * shadow stack allocated, and exit_thread() calls this function to
> +        * free it.  In this case the parent (current) and the child share
> +        * the same mm struct. Move forward only when they're same.
> +        */
> +       if (!tsk->mm || tsk->mm != current->mm)
> +               return;
> +
> +       /*
> +        * We know shadow stack is enabled but if base is NULL, then
> +        * this task is not managing its own shadow stack (CLONE_VFORK). So
> +        * skip freeing it.
> +        */
> +       base = get_shstk_base(tsk, &size);
> +       if (!base)
> +               return;
> +
> +       vm_munmap(base, size);
> +       set_shstk_base(tsk, 0, 0);
> +}
>
LGTM.

Reviewed-by: Zong Li <zong.li@sifive.com>
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


  reply	other threads:[~2025-03-14  8:30 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-10 14:52 [PATCH v11 00/27] riscv control-flow integrity for usermode Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 01/27] mm: VM_SHADOW_STACK definition for riscv Deepak Gupta
2025-03-14  8:27   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 02/27] dt-bindings: riscv: zicfilp and zicfiss in dt-bindings (extensions.yaml) Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 03/27] riscv: zicfiss / zicfilp enumeration Deepak Gupta
2025-03-14  8:10   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 04/27] riscv: zicfiss / zicfilp extension csr and bit definitions Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 05/27] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit Deepak Gupta
2025-03-14  8:28   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 06/27] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE Deepak Gupta
2025-03-14  8:28   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 07/27] riscv mm: manufacture shadow stack pte Deepak Gupta
2025-03-14  8:29   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 08/27] riscv mmu: teach pte_mkwrite to manufacture shadow stack PTEs Deepak Gupta
2025-03-14  8:29   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 09/27] riscv mmu: write protect and shadow stack Deepak Gupta
2025-03-14  8:29   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 10/27] riscv/mm: Implement map_shadow_stack() syscall Deepak Gupta
2025-03-14  8:30   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 11/27] riscv/shstk: If needed allocate a new shadow stack on clone Deepak Gupta
2025-03-14  8:30   ` Zong Li [this message]
2025-03-10 14:52 ` [PATCH v11 12/27] riscv: Implements arch agnostic shadow stack prctls Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 13/27] prctl: arch-agnostic prctl for indirect branch tracking Deepak Gupta
2025-03-14  8:25   ` Zong Li
2025-03-14 16:19     ` Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 14/27] riscv/traps: Introduce software check exception Deepak Gupta
2025-03-14  8:31   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 15/27] riscv: signal: abstract header saving for setup_sigcontext Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 16/27] riscv/signal: save and restore of shadow stack for signal Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 17/27] riscv/kernel: update __show_regs to print shadow stack register Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 18/27] riscv/ptrace: riscv cfi status and state via ptrace and in core files Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 19/27] riscv/hwprobe: zicfilp / zicfiss enumeration in hwprobe Deepak Gupta
2025-03-14  8:32   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 20/27] riscv: Add Firmware Feature SBI extensions definitions Deepak Gupta
2025-03-14  8:32   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 21/27] riscv: enable kernel access to shadow stack memory via FWFT sbi call Deepak Gupta
2025-03-14  8:33   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 22/27] riscv: kernel command line option to opt out of user cfi Deepak Gupta
2025-03-10 14:52 ` [PATCH v11 23/27] arch/riscv: compile vdso with landing pad Deepak Gupta
2025-03-14  7:37   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 24/27] riscv: create a config for shadow stack and landing pad instr support Deepak Gupta
2025-03-14  8:33   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 25/27] riscv: Documentation for landing pad / indirect branch tracking Deepak Gupta
2025-03-14  8:34   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 26/27] riscv: Documentation for shadow stack on riscv Deepak Gupta
2025-03-14  8:34   ` Zong Li
2025-03-10 14:52 ` [PATCH v11 27/27] kselftest/riscv: kselftest for user mode cfi Deepak Gupta
2025-03-14  8:07   ` Zong Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANXhq0rdYLxUwpHHu9ErYZW93dGF20-ra7TfeKPv8ounu1rC9w@mail.gmail.com \
    --to=zong.li@sifive.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexghiti@rivosinc.com \
    --cc=alistair.francis@wdc.com \
    --cc=andybnac@gmail.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=arnd@arndb.de \
    --cc=atishp@rivosinc.com \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=broonie@kernel.org \
    --cc=charlie@rivosinc.com \
    --cc=cleger@rivosinc.com \
    --cc=conor+dt@kernel.org \
    --cc=conor@kernel.org \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=debug@rivosinc.com \
    --cc=devicetree@vger.kernel.org \
    --cc=ebiederm@xmission.com \
    --cc=evan@rivosinc.com \
    --cc=hpa@zytor.com \
    --cc=jannh@google.com \
    --cc=jim.shu@sifive.com \
    --cc=kees@kernel.org \
    --cc=kito.cheng@sifive.com \
    --cc=krzk+dt@kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=peterz@infradead.org \
    --cc=richard.henderson@linaro.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=robh@kernel.org \
    --cc=samitolvanen@google.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox