From: Mark Rutland <mark.rutland@arm.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: mingo@redhat.com, tglx@linutronix.de, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-api@vger.kernel.org, x86@kernel.org,
pjt@google.com, posk@google.com, avagin@google.com,
jannh@google.com, tdelisle@uwaterloo.ca, posk@posk.io
Subject: Re: [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions
Date: Fri, 21 Jan 2022 16:34:04 +0000 [thread overview]
Message-ID: <YergfOW+qpkQSa60@FVFF77S0Q05N> (raw)
In-Reply-To: <20220120160822.728589540@infradead.org>
On Thu, Jan 20, 2022 at 04:55:19PM +0100, Peter Zijlstra wrote:
> A number of exceptions can re-enable IRQs and schedule (provided the
> context taking the exception has IRQs enabled etc.). Currently this
> isn't standardized and each architecture does it slightly different.
>
> (Notably, ARM64 restores/inherits more than just the IRQ state,
> suggesting at least some form of arch_ hook might be appropriate on
> top of this).
For arm64, I suspect we want the arch code to call the umcg hooks directly,
rather than adding arch_* hooks that get called by the generic
irqentry_irq_{enable,disable}() functions, which doesn't really fit the way we
want to conditionally inherit some mask bits upon entry (depending on the
specific exception taken) and unconditionally mask everything prior to exit.
Ignoring that, arm64 also has separate vectors for exceptions taken from EL0
(userspace) and EL1 (kernel), so we don't need to look at the regs to know that
an exception has been taken from userspace (which we always run with IRQs
unmasked). For patch 5, where we actually add hooks, it'd be cleaner for us to
directly invoke UMCG hooks in our enter_from_user_mode() and
exit_to_user_mode() paths.
That said, if this is useful for other architectures, I don't have a problem
for adding them as generic helpers; I just don't think we should be required to
use them directly, and should make sure the underlying primitives are available
and clearly documented.
Thanks,
Mark.
> Create a single set of functions for this and convert x86 to use
> these. The purpose is to have a single (common) place to hook in order
> to cover all the exceptions that can schedule().
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> arch/x86/kernel/traps.c | 48 ++++++++++++++++---------------------------
> arch/x86/mm/fault.c | 28 ++++++++++++-------------
> include/linux/entry-common.h | 24 +++++++++++++++++++++
> 3 files changed, 56 insertions(+), 44 deletions(-)
>
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -73,18 +73,6 @@
>
> DECLARE_BITMAP(system_vectors, NR_VECTORS);
>
> -static inline void cond_local_irq_enable(struct pt_regs *regs)
> -{
> - if (regs->flags & X86_EFLAGS_IF)
> - local_irq_enable();
> -}
> -
> -static inline void cond_local_irq_disable(struct pt_regs *regs)
> -{
> - if (regs->flags & X86_EFLAGS_IF)
> - local_irq_disable();
> -}
> -
> __always_inline int is_valid_bugaddr(unsigned long addr)
> {
> if (addr < TASK_SIZE_MAX)
> @@ -177,9 +165,9 @@ static void do_error_trap(struct pt_regs
>
> if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
> NOTIFY_STOP) {
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
> do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> }
> }
>
> @@ -300,7 +288,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_
> if (!user_mode(regs))
> die("Split lock detected\n", regs, error_code);
>
> - local_irq_enable();
> + irqentry_irq_enable(regs);
>
> if (handle_user_split_lock(regs, error_code))
> goto out;
> @@ -309,7 +297,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_
> error_code, BUS_ADRALN, NULL);
>
> out:
> - local_irq_disable();
> + irqentry_irq_disable(regs);
> }
>
> #ifdef CONFIG_VMAP_STACK
> @@ -473,14 +461,14 @@ DEFINE_IDTENTRY(exc_bounds)
> if (notify_die(DIE_TRAP, "bounds", regs, 0,
> X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
> return;
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
>
> if (!user_mode(regs))
> die("bounds", regs, 0);
>
> do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL);
>
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> }
>
> enum kernel_gp_hint {
> @@ -567,7 +555,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr
> unsigned long gp_addr;
> int ret;
>
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
>
> if (static_cpu_has(X86_FEATURE_UMIP)) {
> if (user_mode(regs) && fixup_umip_exception(regs))
> @@ -638,7 +626,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr
> die_addr(desc, regs, error_code, gp_addr);
>
> exit:
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> }
>
> static bool do_int3(struct pt_regs *regs)
> @@ -665,9 +653,9 @@ static void do_int3_user(struct pt_regs
> if (do_int3(regs))
> return;
>
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
> do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL);
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> }
>
> DEFINE_IDTENTRY_RAW(exc_int3)
> @@ -1003,7 +991,7 @@ static __always_inline void exc_debug_us
> goto out;
>
> /* It's safe to allow irq's after DR6 has been saved */
> - local_irq_enable();
> + irqentry_irq_enable(regs);
>
> if (v8086_mode(regs)) {
> handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
> @@ -1020,7 +1008,7 @@ static __always_inline void exc_debug_us
> send_sigtrap(regs, 0, get_si_code(dr6));
>
> out_irq:
> - local_irq_disable();
> + irqentry_irq_disable(regs);
> out:
> instrumentation_end();
> irqentry_exit_to_user_mode(regs);
> @@ -1064,7 +1052,7 @@ static void math_error(struct pt_regs *r
> char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
> "simd exception";
>
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
>
> if (!user_mode(regs)) {
> if (fixup_exception(regs, trapnr, 0, 0))
> @@ -1099,7 +1087,7 @@ static void math_error(struct pt_regs *r
> force_sig_fault(SIGFPE, si_code,
> (void __user *)uprobe_get_trap_addr(regs));
> exit:
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> }
>
> DEFINE_IDTENTRY(exc_coprocessor_error)
> @@ -1160,7 +1148,7 @@ static bool handle_xfd_event(struct pt_r
> if (WARN_ON(!user_mode(regs)))
> return false;
>
> - local_irq_enable();
> + irqentry_irq_enable(regs);
>
> err = xfd_enable_feature(xfd_err);
>
> @@ -1173,7 +1161,7 @@ static bool handle_xfd_event(struct pt_r
> break;
> }
>
> - local_irq_disable();
> + irqentry_irq_disable(regs);
> return true;
> }
>
> @@ -1188,12 +1176,12 @@ DEFINE_IDTENTRY(exc_device_not_available
> if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
> struct math_emu_info info = { };
>
> - cond_local_irq_enable(regs);
> + irqentry_irq_enable(regs);
>
> info.regs = regs;
> math_emulate(&info);
>
> - cond_local_irq_disable(regs);
> + irqentry_irq_disable(regs);
> return;
> }
> #endif
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1209,6 +1209,12 @@ do_kern_addr_fault(struct pt_regs *regs,
> NOKPROBE_SYMBOL(do_kern_addr_fault);
>
> /*
> + * EFLAGS[3] is unused and ABI defined to be 0, use it to store IRQ state,
> + * because do_user_addr_fault() is too convoluted to track things.
> + */
> +#define X86_EFLAGS_MISC (1UL << 3)
> +
> +/*
> * Handle faults in the user portion of the address space. Nothing in here
> * should check X86_PF_USER without a specific justification: for almost
> * all purposes, we should treat a normal kernel access to user memory
> @@ -1290,13 +1296,11 @@ void do_user_addr_fault(struct pt_regs *
> * User-mode registers count as a user access even for any
> * potential system fault or CPU buglet:
> */
> - if (user_mode(regs)) {
> - local_irq_enable();
> + if (user_mode(regs))
> flags |= FAULT_FLAG_USER;
> - } else {
> - if (regs->flags & X86_EFLAGS_IF)
> - local_irq_enable();
> - }
> +
> + irqentry_irq_enable(regs);
> + regs->flags |= X86_EFLAGS_MISC;
>
> perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
>
> @@ -1483,14 +1487,10 @@ handle_page_fault(struct pt_regs *regs,
> do_kern_addr_fault(regs, error_code, address);
> } else {
> do_user_addr_fault(regs, error_code, address);
> - /*
> - * User address page fault handling might have reenabled
> - * interrupts. Fixing up all potential exit points of
> - * do_user_addr_fault() and its leaf functions is just not
> - * doable w/o creating an unholy mess or turning the code
> - * upside down.
> - */
> - local_irq_disable();
> + if (regs->flags & X86_EFLAGS_MISC) {
> + regs->flags &= ~X86_EFLAGS_MISC;
> + irqentry_irq_disable(regs);
> + }
> }
> }
>
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -7,6 +7,7 @@
> #include <linux/syscalls.h>
> #include <linux/seccomp.h>
> #include <linux/sched.h>
> +#include <asm/ptrace.h>
>
> #include <asm/entry-common.h>
>
> @@ -213,6 +214,29 @@ static inline void local_irq_disable_exi
> #endif
>
> /**
> + * irqentry_irq_enable - Conditionally enable IRQs from exceptions
> + *
> + * Common code for exceptions to (re)enable IRQs, typically done to allow
> + * from-user exceptions to schedule (since they run on the task stack).
> + */
> +static inline void irqentry_irq_enable(struct pt_regs *regs)
> +{
> + if (!regs_irqs_disabled(regs))
> + local_irq_enable();
> +}
> +
> +/**
> + * irqentry_irq_disable - Conditionally disable IRQs from exceptions
> + *
> + * Counterpart of irqentry_irq_enable().
> + */
> +static inline void irqentry_irq_disable(struct pt_regs *regs)
> +{
> + if (!regs_irqs_disabled(regs))
> + local_irq_disable();
> +}
> +
> +/**
> * arch_exit_to_user_mode_work - Architecture specific TIF work for exit
> * to user mode.
> * @regs: Pointer to currents pt_regs
>
>
next prev parent reply other threads:[~2022-01-21 16:34 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-20 15:55 [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 1/5] mm: Avoid unmapping pinned pages Peter Zijlstra
2022-01-20 18:03 ` Nadav Amit
2022-01-21 7:59 ` Peter Zijlstra
2022-01-20 18:25 ` David Hildenbrand
2022-01-21 7:51 ` Peter Zijlstra
2022-01-21 8:22 ` David Hildenbrand
2022-01-21 8:59 ` Peter Zijlstra
2022-01-21 9:04 ` David Hildenbrand
2022-01-21 11:40 ` Peter Zijlstra
2022-01-21 12:04 ` David Hildenbrand
2022-01-20 15:55 ` [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions Peter Zijlstra
2022-01-21 16:34 ` Mark Rutland [this message]
2022-01-20 15:55 ` [RFC][PATCH v2 3/5] sched/umcg: add WF_CURRENT_CPU and externise ttwu Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 4/5] x86/uaccess: Implement unsafe_try_cmpxchg_user() Peter Zijlstra
2022-01-27 2:17 ` Sean Christopherson
2022-01-27 6:36 ` Sean Christopherson
2022-01-27 9:56 ` Peter Zijlstra
2022-01-27 23:33 ` Sean Christopherson
2022-01-28 0:17 ` Nick Desaulniers
2022-01-28 16:29 ` Sean Christopherson
2022-01-27 9:55 ` Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups Peter Zijlstra
2022-01-21 11:47 ` Peter Zijlstra
2022-01-21 15:18 ` Peter Zijlstra
2022-01-24 14:29 ` Peter Zijlstra
2022-01-24 16:44 ` Peter Zijlstra
2022-01-24 17:06 ` Peter Oskolkov
2022-01-25 14:59 ` Peter Zijlstra
2022-01-24 13:59 ` Peter Zijlstra
2022-01-21 12:26 ` Peter Zijlstra
2022-01-21 16:57 ` Mark Rutland
2022-01-24 9:48 ` Peter Zijlstra
2022-01-24 10:03 ` Peter Zijlstra
2022-01-24 10:07 ` Peter Zijlstra
2022-01-24 10:27 ` Mark Rutland
2022-01-24 14:46 ` Tao Zhou
2022-01-27 12:19 ` Peter Zijlstra
2022-01-27 18:33 ` Tao Zhou
2022-01-27 12:25 ` Peter Zijlstra
2022-01-27 18:47 ` Tao Zhou
2022-01-27 12:26 ` Peter Zijlstra
2022-01-27 18:31 ` Tao Zhou
2022-01-20 17:28 ` [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Oskolkov
2022-01-21 8:01 ` Peter Zijlstra
2022-01-21 18:01 ` Steven Rostedt
2022-01-24 8:20 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YergfOW+qpkQSa60@FVFF77S0Q05N \
--to=mark.rutland@arm.com \
--cc=avagin@google.com \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=jannh@google.com \
--cc=juri.lelli@redhat.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=posk@google.com \
--cc=posk@posk.io \
--cc=rostedt@goodmis.org \
--cc=tdelisle@uwaterloo.ca \
--cc=tglx@linutronix.de \
--cc=vincent.guittot@linaro.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox