linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 18/43] x86/percpu: Use PC-relative addressing for percpu variable references
       [not found] <cover.1682673542.git.houwenlong.hwl@antgroup.com>
@ 2023-04-28  9:50 ` Hou Wenlong
  0 siblings, 0 replies; only message in thread
From: Hou Wenlong @ 2023-04-28  9:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: Thomas Garnier, Lai Jiangshan, Kees Cook, Hou Wenlong,
	Andy Lutomirski, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86, H. Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
	Pawan Gupta, Dennis Zhou, Tejun Heo, Christoph Lameter,
	Paolo Bonzini, Wanpeng Li, Vitaly Kuznetsov, Juergen Gross,
	Boris Ostrovsky, Nathan Chancellor, Nick Desaulniers, Tom Rix,
	David Woodhouse, Brian Gerst, linux-mm, kvm, xen-devel, llvm

For PIE binary, all symbol references are PC-relative addressing, even
for percpu variable. So to keep compatible with PIE, add %rip suffix in
percpu assembly macros if PIE is enabled. However, relocation of percpu
variable references is broken now for PIE. It would be fixed later.

Suggested-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Cc: Thomas Garnier <thgarnie@chromium.org>
Cc: Kees Cook <keescook@chromium.org>
---
 arch/x86/entry/calling.h             | 17 ++++++++++++----
 arch/x86/include/asm/nospec-branch.h | 10 +++++-----
 arch/x86/include/asm/percpu.h        | 29 +++++++++++++++++++++++++---
 arch/x86/kernel/head_64.S            |  2 +-
 arch/x86/kernel/kvm.c                | 21 ++++++++++++++++----
 arch/x86/lib/cmpxchg16b_emu.S        |  8 ++++----
 arch/x86/xen/xen-asm.S               | 10 +++++-----
 7 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index f6907627172b..11328578741d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -173,7 +173,7 @@ For 32-bit we have the following conventions - kernel is built with
 .endm
 
 #define THIS_CPU_user_pcid_flush_mask   \
-	PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+	PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
 
 .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
@@ -370,8 +370,8 @@ For 32-bit we have the following conventions - kernel is built with
 .endm
 
 .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
+	GET_PERCPU_BASE \scratch_reg \save_reg
 	rdgsbase \save_reg
-	GET_PERCPU_BASE \scratch_reg
 	wrgsbase \scratch_reg
 .endm
 
@@ -407,15 +407,24 @@ For 32-bit we have the following conventions - kernel is built with
  * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
  * while running KVM's run loop.
  */
-.macro GET_PERCPU_BASE reg:req
+#ifdef CONFIG_X86_PIE
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
+	LOAD_CPU_AND_NODE_SEG_LIMIT \reg
+	andq	$VDSO_CPUNODE_MASK, \reg
+	leaq	__per_cpu_offset(%rip), \scratch_reg
+	movq	(\scratch_reg, \reg, 8), \reg
+.endm
+#else
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
 	LOAD_CPU_AND_NODE_SEG_LIMIT \reg
 	andq	$VDSO_CPUNODE_MASK, \reg
 	movq	__per_cpu_offset(, \reg, 8), \reg
 .endm
+#endif /* CONFIG_X86_PIE */
 
 #else
 
-.macro GET_PERCPU_BASE reg:req
+.macro GET_PERCPU_BASE reg:req scratch_reg:req
 	movq	pcpu_unit_offsets(%rip), \reg
 .endm
 
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index edb2b0cb8efe..d8fd935e0697 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -59,13 +59,13 @@
 
 #ifdef CONFIG_CALL_THUNKS_DEBUG
 # define CALL_THUNKS_DEBUG_INC_CALLS				\
-	incq	%gs:__x86_call_count;
+	incq	%gs:(__x86_call_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_RETS				\
-	incq	%gs:__x86_ret_count;
+	incq	%gs:(__x86_ret_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_STUFFS				\
-	incq	%gs:__x86_stuffs_count;
+	incq	%gs:(__x86_stuffs_count)__percpu_rel;
 # define CALL_THUNKS_DEBUG_INC_CTXSW				\
-	incq	%gs:__x86_ctxsw_count;
+	incq	%gs:(__x86_ctxsw_count)__percpu_rel;
 #else
 # define CALL_THUNKS_DEBUG_INC_CALLS
 # define CALL_THUNKS_DEBUG_INC_RETS
@@ -95,7 +95,7 @@
 	CALL_THUNKS_DEBUG_INC_CALLS
 
 #define INCREMENT_CALL_DEPTH					\
-	sarq	$5, %gs:pcpu_hot + X86_call_depth;		\
+	sarq	$5, %gs:(pcpu_hot + X86_call_depth)__percpu_rel;\
 	CALL_THUNKS_DEBUG_INC_CALLS
 
 #define ASM_INCREMENT_CALL_DEPTH				\
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 13c0d63ed55e..a627a073c6ea 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,16 +4,26 @@
 
 #ifdef CONFIG_X86_64
 #define __percpu_seg		gs
+#ifdef CONFIG_X86_PIE
+#define __percpu_rel		(%rip)
+#else
+#define __percpu_rel
+#endif /* CONFIG_X86_PIE */
 #else
 #define __percpu_seg		fs
+#define __percpu_rel
 #endif
 
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_SMP
-#define PER_CPU_VAR(var)	%__percpu_seg:var
+/* Compatible with Position Independent Code */
+#define PER_CPU_VAR(var)	%__percpu_seg:(var)##__percpu_rel
+/* Rare absolute reference */
+#define PER_CPU_VAR_ABS(var)	%__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU_VAR(var)	var
+#define PER_CPU_VAR(var)	(var)##__percpu_rel
+#define PER_CPU_VAR_ABS(var)	var
 #endif	/* SMP */
 
 #ifdef CONFIG_X86_64_SMP
@@ -148,10 +158,23 @@ do {									\
 	(typeof(_var))(unsigned long) pfo_val__;			\
 })
 
+/*
+ * Position Independent code uses relative addresses only.
+ * The 'P' modifier prevents RIP-relative addressing in GCC,
+ * so use 'a' modifier instead. Howerver, 'P' modifier allows
+ * RIP-relative addressing in Clang but Clang doesn't support
+ * 'a' modifier.
+ */
+#if defined(CONFIG_X86_PIE) && defined(CONFIG_CC_IS_GCC)
+#define __percpu_stable_arg	__percpu_arg(a[var])
+#else
+#define __percpu_stable_arg	__percpu_arg(P[var])
+#endif
+
 #define percpu_stable_op(size, op, _var)				\
 ({									\
 	__pcpu_type_##size pfo_val__;					\
-	asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")	\
+	asm(__pcpu_op2_##size(op, __percpu_stable_arg, "%[val]")	\
 	    : [val] __pcpu_reg_##size("=", pfo_val__)			\
 	    : [var] "p" (&(_var)));					\
 	(typeof(_var))(unsigned long) pfo_val__;			\
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 61f1873d0ff7..1eed50b7d1ac 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -396,7 +396,7 @@ SYM_CODE_START(start_cpu0)
 	UNWIND_HINT_END_OF_STACK
 
 	/* Find the idle task stack */
-	movq	PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
+	movq	PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx
 	movq	TASK_threadsp(%rcx), %rsp
 
 	jmp	.Ljump_to_C_code
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1cceac5984da..32d7b201f4f0 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -794,14 +794,27 @@ PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
 
 extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
 
+#ifndef CONFIG_X86_PIE
+#define KVM_CHECK_VCPU_PREEMPTED			\
+	"movq	__per_cpu_offset(,%rdi,8), %rax;"	\
+	"cmpb	$0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+#else
+#define KVM_CHECK_VCPU_PREEMPTED			\
+	"pushq	%rdi;"					\
+	"leaq	__per_cpu_offset(%rip), %rax;"		\
+	"movq	(%rax,%rdi,8), %rax;"			\
+	"leaq	steal_time(%rip), %rdi;"		\
+	"cmpb	$0, (%rax, %rdi, 1);"			\
+	"popq	%rdi;"
+#endif
+
 /*
  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
  * restoring to/from the stack.
  */
-#define PV_VCPU_PREEMPTED_ASM						     \
- "movq   __per_cpu_offset(,%rdi,8), %rax\n\t"				     \
- "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
- "setne  %al\n\t"
+#define PV_VCPU_PREEMPTED_ASM		\
+	KVM_CHECK_VCPU_PREEMPTED	\
+	"setne  %al\n\t"
 
 DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
 		    PV_VCPU_PREEMPTED_ASM, .text);
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 33c70c0160ea..891c5e9fd868 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -27,13 +27,13 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 	pushfq
 	cli
 
-	cmpq PER_CPU_VAR((%rsi)), %rax
+	cmpq PER_CPU_VAR_ABS((%rsi)), %rax
 	jne .Lnot_same
-	cmpq PER_CPU_VAR(8(%rsi)), %rdx
+	cmpq PER_CPU_VAR_ABS(8(%rsi)), %rdx
 	jne .Lnot_same
 
-	movq %rbx, PER_CPU_VAR((%rsi))
-	movq %rcx, PER_CPU_VAR(8(%rsi))
+	movq %rbx, PER_CPU_VAR_ABS((%rsi))
+	movq %rcx, PER_CPU_VAR_ABS(8(%rsi))
 
 	popfq
 	mov $1, %al
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 9e5e68008785..448958ddbaf8 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -28,7 +28,7 @@
  * non-zero.
  */
 SYM_FUNC_START(xen_irq_disable_direct)
-	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+	movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
 	RET
 SYM_FUNC_END(xen_irq_disable_direct)
 
@@ -69,7 +69,7 @@ SYM_FUNC_END(check_events)
 SYM_FUNC_START(xen_irq_enable_direct)
 	FRAME_BEGIN
 	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+	movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
 
 	/*
 	 * Preempt here doesn't matter because that will deal with any
@@ -78,7 +78,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
 	 */
 
 	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
 	jz 1f
 
 	call check_events
@@ -97,7 +97,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
  * x86 use opposite senses (mask vs enable).
  */
 SYM_FUNC_START(xen_save_fl_direct)
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
 	setz %ah
 	addb %ah, %ah
 	RET
@@ -113,7 +113,7 @@ SYM_FUNC_END(xen_read_cr2);
 
 SYM_FUNC_START(xen_read_cr2_direct)
 	FRAME_BEGIN
-	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+	_ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
 	FRAME_END
 	RET
 SYM_FUNC_END(xen_read_cr2_direct);
-- 
2.31.1



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-28  9:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1682673542.git.houwenlong.hwl@antgroup.com>
2023-04-28  9:50 ` [PATCH RFC 18/43] x86/percpu: Use PC-relative addressing for percpu variable references Hou Wenlong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox