From: Yunhui Cui <cuiyunhui@bytedance.com>
To: aou@eecs.berkeley.edu, alex@ghiti.fr, andii@kernel.org,
andybnac@gmail.com, apatel@ventanamicro.com, ast@kernel.org,
ben.dooks@codethink.co.uk, bjorn@kernel.org, bpf@vger.kernel.org,
charlie@rivosinc.com, cl@gentwo.org, conor.dooley@microchip.com,
cuiyunhui@bytedance.com, cyrilbur@tenstorrent.com,
daniel@iogearbox.net, debug@rivosinc.com, dennis@kernel.org,
eddyz87@gmail.com, haoluo@google.com, john.fastabend@gmail.com,
jolsa@kernel.org, kpsingh@kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-riscv@lists.infradead.org, linux@rasmusvillemoes.dk,
martin.lau@linux.dev, palmer@dabbelt.com, pjw@kernel.org,
puranjay@kernel.org, pulehui@huawei.com, ruanjinjie@huawei.com,
rkrcmar@ventanamicro.com, samuel.holland@sifive.com,
sdf@fomichev.me, song@kernel.org, tglx@linutronix.de,
tj@kernel.org, thuth@redhat.com, yonghong.song@linux.dev,
yury.norov@gmail.com, zong.li@sifive.com
Subject: [PATCH v3 2/3] riscv: introduce percpu.h into include/asm
Date: Tue, 16 Dec 2025 09:47:20 +0800 [thread overview]
Message-ID: <20251216014721.42262-3-cuiyunhui@bytedance.com> (raw)
In-Reply-To: <20251216014721.42262-1-cuiyunhui@bytedance.com>
Current percpu operations rely on generic implementations, where
raw_local_irq_save() introduces substantial overhead. Optimization
is achieved through atomic operations and preemption disabling.
Currently, since RISC-V does not support lr/sc.b/h, when ZABHA is
not supported, lr/sc.w needs to be used instead, which requires
some additional mask operations.
Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
arch/riscv/include/asm/percpu.h | 244 ++++++++++++++++++++++++++++++++
1 file changed, 244 insertions(+)
create mode 100644 arch/riscv/include/asm/percpu.h
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
new file mode 100644
index 0000000000000..c5bacf6d864ee
--- /dev/null
+++ b/arch/riscv/include/asm/percpu.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/alternative-macros.h>
+#include <asm/cpufeature-macros.h>
+#include <asm/hwcap.h>
+
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
+{ \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
+ \
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
+}
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+
+#define __PERCPU_AMO_OP_CASE(sfx, name, sz, amo_insn) \
+static inline void \
+__percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+}
+
+#define PERCPU_OP(name, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.d, name, 64, amo_insn)
+
+PERCPU_OP(add, add)
+PERCPU_OP(andnot, and)
+PERCPU_OP(or, or)
+
+/*
+ * Currently, only this_cpu_add_return_xxx() requires a return value,
+ * and the PERCPU_RET_OP() does not account for other operations.
+ */
+#define __PERCPU_AMO_RET_OP_CASE(sfx, name, sz, amo_insn) \
+static inline u##sz \
+__percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ register u##sz ret; \
+ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr), [ret] "=r" (ret) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+ \
+ return ret + val; \
+}
+
+#define PERCPU_RET_OP(name, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.d, name, 64, amo_insn)
+
+PERCPU_RET_OP(add, add)
+
+#define PERCPU_8_16_GET_SHIFT(ptr) (((unsigned long)(ptr) & 0x3) * BITS_PER_BYTE)
+#define PERCPU_8_16_GET_MASK(sz) GENMASK((sz)-1, 0)
+#define PERCPU_8_16_GET_PTR32(ptr) ((u32 *)((unsigned long)(ptr) & ~0x3))
+
+#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op) \
+static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr) \
+ : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz))) \
+ : "memory"); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift; \
+ const val_type val_trunc = (val_type)((new_val_expr) \
+ & PERCPU_8_16_GET_MASK(sz)); \
+ u32 retx, rc; \
+ val_type new_val_type; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %2\n" \
+ "and %3, %0, %4\n" \
+ "srl %3, %3, %5\n" \
+ #asm_op " %3, %3, %6\n" \
+ "sll %3, %3, %5\n" \
+ "and %1, %0, %7\n" \
+ "or %1, %1, %3\n" \
+ "sc.w %1, %1, %2\n" \
+ "bnez %1, 0b\n" \
+ : "=&r"(retx), "=&r"(rc), "+A"(*ptr32), "=&r"(new_val_type) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask) \
+ : "memory"); \
+ } \
+}
+
+#define PERCPU_OP_8_16(op_name, op, expr, final_op) \
+ PERCPU_8_16_OP(op_name, op, 8, .b, u8, expr, final_op); \
+ PERCPU_8_16_OP(op_name, op, 16, .h, u16, expr, final_op)
+
+PERCPU_OP_8_16(add, add, val, add)
+PERCPU_OP_8_16(andnot, and, ~val, and)
+PERCPU_OP_8_16(or, or, val, or)
+
+#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr) \
+static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ register val_type ret; \
+ asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret) \
+ : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz))) \
+ : "memory"); \
+ return ret + (val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift); \
+ const u32 inv_mask = ~mask; \
+ const val_type val_trunc = (val_type)((new_val_expr) \
+ & PERCPU_8_16_GET_MASK(sz)); \
+ u32 old, new, tmp; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %3\n" \
+ "and %1, %0, %4\n" \
+ "srl %1, %1, %5\n" \
+ "add %1, %1, %6\n" \
+ "and %1, %1, %7\n" \
+ "sll %1, %1, %5\n" \
+ "and %2, %0, %8\n" \
+ "or %2, %2, %1\n" \
+ "sc.w %2, %2, %3\n" \
+ "bnez %2, 0b\n" \
+ : "=r"(old), "=r"(tmp), "=&r"(new), "+A"(*ptr32) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
+ "r"(inv_mask) \
+ : "memory"); \
+ return (val_type)(tmp); \
+ } \
+}
+
+PERCPU_8_16_RET_OP(add, add, 8, .b, u8, val)
+PERCPU_8_16_RET_OP(add, add, 16, .h, u16, val)
+
+#define _pcp_protect(op, pcp, ...) \
+({ \
+ preempt_disable_notrace(); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
+ preempt_enable_notrace(); \
+})
+
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable_notrace(); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
+ preempt_enable_notrace(); \
+ __retval; \
+})
+
+#define this_cpu_read_1(pcp) _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) _pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp) _pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val) _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val) _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val) _pcp_protect(__percpu_add_amo_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) _pcp_protect(__percpu_add_amo_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) _pcp_protect(__percpu_add_amo_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val) _pcp_protect(__percpu_add_amo_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_8, pcp, val)
+
+#define this_cpu_add_return_2(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_16, pcp, val)
+
+#define this_cpu_add_return_4(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_32, pcp, val)
+
+#define this_cpu_add_return_8(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val) _pcp_protect(__percpu_andnot_amo_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val) _pcp_protect(__percpu_andnot_amo_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val) _pcp_protect(__percpu_andnot_amo_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val) _pcp_protect(__percpu_andnot_amo_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val) _pcp_protect(__percpu_or_amo_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) _pcp_protect(__percpu_or_amo_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) _pcp_protect(__percpu_or_amo_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val) _pcp_protect(__percpu_or_amo_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#ifdef system_has_cmpxchg128
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ u128 ret__; \
+ typeof(pcp) *ptr__; \
+ \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ if (system_has_cmpxchg128()) \
+ ret__ = cmpxchg128_local(ptr__, (o), (n)); \
+ else \
+ ret__ = this_cpu_generic_cmpxchg(pcp, (o), (n)); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+#endif
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
--
2.39.5
next prev parent reply other threads:[~2025-12-16 1:48 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-16 1:47 [PATCH v3 0/3] RISC-V: add percpu.h to include/asm Yunhui Cui
2025-12-16 1:47 ` [PATCH v3 1/3] riscv: remove irqflags.h inclusion in asm/bitops.h Yunhui Cui
2025-12-16 17:39 ` Yury Norov
2025-12-16 1:47 ` Yunhui Cui [this message]
2025-12-20 14:45 ` [PATCH v3 2/3] riscv: introduce percpu.h into include/asm kernel test robot
2025-12-20 16:31 ` kernel test robot
2025-12-16 1:47 ` [PATCH v3 3/3] riscv: store percpu offset into thread_info Yunhui Cui
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251216014721.42262-3-cuiyunhui@bytedance.com \
--to=cuiyunhui@bytedance.com \
--cc=alex@ghiti.fr \
--cc=andii@kernel.org \
--cc=andybnac@gmail.com \
--cc=aou@eecs.berkeley.edu \
--cc=apatel@ventanamicro.com \
--cc=ast@kernel.org \
--cc=ben.dooks@codethink.co.uk \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=charlie@rivosinc.com \
--cc=cl@gentwo.org \
--cc=conor.dooley@microchip.com \
--cc=cyrilbur@tenstorrent.com \
--cc=daniel@iogearbox.net \
--cc=debug@rivosinc.com \
--cc=dennis@kernel.org \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-riscv@lists.infradead.org \
--cc=linux@rasmusvillemoes.dk \
--cc=martin.lau@linux.dev \
--cc=palmer@dabbelt.com \
--cc=pjw@kernel.org \
--cc=pulehui@huawei.com \
--cc=puranjay@kernel.org \
--cc=rkrcmar@ventanamicro.com \
--cc=ruanjinjie@huawei.com \
--cc=samuel.holland@sifive.com \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=tglx@linutronix.de \
--cc=thuth@redhat.com \
--cc=tj@kernel.org \
--cc=yonghong.song@linux.dev \
--cc=yury.norov@gmail.com \
--cc=zong.li@sifive.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox