From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: bpf@vger.kernel.org
Cc: daniel@iogearbox.net, andrii@kernel.org, martin.lau@kernel.org,
memxor@gmail.com, eddyz87@gmail.com, tj@kernel.org,
brho@google.com, hannes@cmpxchg.org, linux-mm@kvack.org,
kernel-team@fb.com
Subject: [PATCH bpf-next 06/16] bpf: Add x86-64 JIT support for PROBE_MEM32 pseudo instructions.
Date: Tue, 6 Feb 2024 14:04:31 -0800 [thread overview]
Message-ID: <20240206220441.38311-7-alexei.starovoitov@gmail.com> (raw)
In-Reply-To: <20240206220441.38311-1-alexei.starovoitov@gmail.com>
From: Alexei Starovoitov <ast@kernel.org>
Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] instructions.
They are similar to PROBE_MEM instructions with the following differences:
- PROBE_MEM has to check that the address is in the kernel range with
src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE check
- PROBE_MEM doesn't support store
- PROBE_MEM32 relies on the verifier to clear upper 32-bit in the register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in %r12 in the prologue)
Due to bpf_arena constructions such %r12 + %reg + off16 access is guaranteed
to be within arena virtual range, so no address check at run-time.
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop.
When LDX faults the destination register is zeroed.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
arch/x86/net/bpf_jit_comp.c | 183 +++++++++++++++++++++++++++++++++++-
include/linux/bpf.h | 1 +
include/linux/filter.h | 3 +
3 files changed, 186 insertions(+), 1 deletion(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1390d1e331b..883b7f604b9a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
+#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
[X86_REG_R9] = 1, /* R9 register, 6th function argument */
+ [X86_REG_R12] = 4, /* R12 callee saved */
};
static const int reg2pt_regs[] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
BIT(X86_REG_R9) |
+ BIT(X86_REG_R12) |
BIT(BPF_REG_AX));
}
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte;
}
+static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
+{
+ if (is_ereg(r1))
+ byte |= 1;
+ if (is_ereg(index))
+ byte |= 2;
+ if (is_ereg(r2))
+ byte |= 4;
+ return byte;
+}
+
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg)
{
@@ -887,6 +901,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
*pprog = prog;
}
+static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ if (is_imm8(off)) {
+ EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ } else {
+ EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ }
+ *pprog = prog;
+}
+
/*
* Emit a REX byte if it will be necessary to address these registers
*/
@@ -968,6 +994,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movzx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* movzx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* mov eax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ case BPF_DW:
+ /* mov rax, qword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1002,6 +1059,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
+static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], al */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], ax */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], eax */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], rax */
+ EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
+/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
+static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], imm8 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], imm16 */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
+ EMIT(imm, bpf_size_to_x86_bytes(size));
+ *pprog = prog;
+}
+
+static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
+{
+ emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
+}
+
static int emit_atomic(u8 **pprog, u8 atomic_op,
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
@@ -1043,12 +1165,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
+#define DONT_CLEAR 1
+
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
/* jump over faulting load and clear dest register */
- *(unsigned long *)((void *)regs + reg) = 0;
+ if (reg != DONT_CLEAR)
+ *(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
return true;
}
@@ -1147,11 +1272,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ u64 arena_vm_start;
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
+ arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
+
detect_reg_usage(insn, insn_cnt, callee_regs_used,
&tail_call_seen);
@@ -1172,8 +1300,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
push_r12(&prog);
push_callee_regs(&prog, all_callee_regs_used);
} else {
+ if (arena_vm_start)
+ push_r12(&prog);
push_callee_regs(&prog, callee_regs_used);
}
+ if (arena_vm_start)
+ emit_mov_imm64(&prog, X86_REG_R12,
+ arena_vm_start >> 32, (u32) arena_vm_start);
ilen = prog - temp;
if (rw_image)
@@ -1564,6 +1697,52 @@ st: if (is_imm8(insn->off))
emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
+ goto populate_extable;
+
+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ if (BPF_CLASS(insn->code) == BPF_LDX)
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+populate_extable:
+ {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen + (start_of_ldx - temp);
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ /* switch ex to rw buffer for writes */
+ ex = (void *)rw_image + ((void *)ex - (void *)image);
+
+ ex->insn = delta;
+
+ ex->data = EX_TYPE_BPF;
+
+ ex->fixup = (prog - start_of_ldx) |
+ ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ }
+ break;
+
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
@@ -2036,6 +2215,8 @@ st: if (is_imm8(insn->off))
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (arena_vm_start)
+ pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 42f22bc881f0..a0d737bb86d1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1460,6 +1460,7 @@ struct bpf_prog_aux {
bool xdp_has_frags;
bool exception_cb;
bool exception_boundary;
+ struct bpf_arena *arena;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fee070b9826e..cd76d43412d0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -72,6 +72,9 @@ struct ctl_table_header;
/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
#define BPF_PROBE_MEMSX 0x40
+/* unused opcode to mark special load instruction. Same as BPF_MSH */
+#define BPF_PROBE_MEM32 0xa0
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
--
2.34.1
next prev parent reply other threads:[~2024-02-06 22:05 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-06 22:04 [PATCH bpf-next 00/16] bpf: Introduce BPF arena Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 01/16] bpf: Allow kfuncs return 'void *' Alexei Starovoitov
2024-02-08 19:40 ` Andrii Nakryiko
2024-02-09 0:09 ` Alexei Starovoitov
2024-02-09 19:09 ` Andrii Nakryiko
2024-02-10 2:32 ` Alexei Starovoitov
2024-02-09 16:06 ` David Vernet
2024-02-06 22:04 ` [PATCH bpf-next 02/16] bpf: Recognize '__map' suffix in kfunc arguments Alexei Starovoitov
2024-02-09 16:57 ` David Vernet
2024-02-09 17:46 ` Alexei Starovoitov
2024-02-09 18:11 ` David Vernet
2024-02-09 18:59 ` Alexei Starovoitov
2024-02-09 19:18 ` David Vernet
2024-02-06 22:04 ` [PATCH bpf-next 03/16] mm: Expose vmap_pages_range() to the rest of the kernel Alexei Starovoitov
2024-02-07 21:07 ` Lorenzo Stoakes
2024-02-07 22:56 ` Alexei Starovoitov
2024-02-08 5:44 ` Johannes Weiner
2024-02-08 23:55 ` Alexei Starovoitov
2024-02-09 6:36 ` Lorenzo Stoakes
2024-02-14 8:31 ` Christoph Hellwig
2024-02-06 22:04 ` [PATCH bpf-next 04/16] bpf: Introduce bpf_arena Alexei Starovoitov
2024-02-07 18:40 ` Barret Rhoden
2024-02-07 20:55 ` Alexei Starovoitov
2024-02-07 21:11 ` Barret Rhoden
2024-02-08 6:26 ` Alexei Starovoitov
2024-02-08 21:58 ` Barret Rhoden
2024-02-08 23:36 ` Alexei Starovoitov
2024-02-08 23:50 ` Barret Rhoden
2024-02-06 22:04 ` [PATCH bpf-next 05/16] bpf: Disasm support for cast_kern/user instructions Alexei Starovoitov
2024-02-06 22:04 ` Alexei Starovoitov [this message]
2024-02-06 22:04 ` [PATCH bpf-next 07/16] bpf: Add x86-64 JIT support for bpf_cast_user instruction Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 08/16] bpf: Recognize cast_kern/user instructions in the verifier Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 09/16] bpf: Recognize btf_decl_tag("arg:arena") as PTR_TO_ARENA Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 10/16] libbpf: Add __arg_arena to bpf_helpers.h Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 11/16] libbpf: Add support for bpf_arena Alexei Starovoitov
2024-02-08 1:15 ` Andrii Nakryiko
2024-02-08 1:38 ` Alexei Starovoitov
2024-02-08 18:29 ` Andrii Nakryiko
2024-02-08 18:45 ` Alexei Starovoitov
2024-02-08 18:54 ` Andrii Nakryiko
2024-02-08 18:59 ` Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 12/16] libbpf: Allow specifying 64-bit integers in map BTF Alexei Starovoitov
2024-02-08 1:16 ` Andrii Nakryiko
2024-02-08 1:58 ` Alexei Starovoitov
2024-02-08 18:16 ` Andrii Nakryiko
2024-02-06 22:04 ` [PATCH bpf-next 13/16] bpf: Tell bpf programs kernel's PAGE_SIZE Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 14/16] bpf: Add helper macro bpf_arena_cast() Alexei Starovoitov
2024-02-06 22:04 ` [PATCH bpf-next 15/16] selftests/bpf: Add bpf_arena_list test Alexei Starovoitov
2024-02-07 17:04 ` Eduard Zingerman
2024-02-08 2:59 ` Alexei Starovoitov
2024-02-08 11:10 ` Jose E. Marchesi
2024-02-06 22:04 ` [PATCH bpf-next 16/16] selftests/bpf: Add bpf_arena_htab test Alexei Starovoitov
2024-02-07 12:34 ` [PATCH bpf-next 00/16] bpf: Introduce BPF arena Donald Hunter
2024-02-07 13:33 ` Barret Rhoden
2024-02-07 20:16 ` Alexei Starovoitov
2024-02-07 20:12 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240206220441.38311-7-alexei.starovoitov@gmail.com \
--to=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brho@google.com \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=kernel-team@fb.com \
--cc=linux-mm@kvack.org \
--cc=martin.lau@kernel.org \
--cc=memxor@gmail.com \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox