From: Jiri Olsa <jolsa@kernel.org>
To: stable@vger.kernel.org
Cc: "Linus Torvalds" <torvalds@linux-foundation.org>,
"Masami Hiramatsu" <mhiramat@kernel.org>,
x86@kernel.org, linux-mm@kvack.org, bpf@vger.kernel.org,
linux-kernel@vger.kernel.org,
"Tsahee Zidenberg" <tsahee@annapurnalabs.com>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Christoph Hellwig" <hch@lst.de>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Mahé Tardy" <mahe.tardy@isovalent.com>,
linux-arm-kernel@lists.infradead.org
Subject: [RFC PATCH stable 5.4 1/8] uaccess: Add strict non-pagefault kernel-space read function
Date: Mon, 22 May 2023 22:33:45 +0200 [thread overview]
Message-ID: <20230522203352.738576-2-jolsa@kernel.org> (raw)
In-Reply-To: <20230522203352.738576-1-jolsa@kernel.org>
From: Daniel Borkmann <daniel@iogearbox.net>
commit 75a1a607bb7e6d918be3aca11ec2214a275392f4 upstream.
Add two new probe_kernel_read_strict() and strncpy_from_unsafe_strict()
helpers which by default alias to the __probe_kernel_read() and the
__strncpy_from_unsafe(), respectively, but can be overridden by archs
which have non-overlapping address ranges for kernel space and user
space in order to bail out with -EFAULT when attempting to probe user
memory including non-canonical user access addresses [0]:
4-level page tables:
user-space mem: 0x0000000000000000 - 0x00007fffffffffff
non-canonical: 0x0000800000000000 - 0xffff7fffffffffff
5-level page tables:
user-space mem: 0x0000000000000000 - 0x00ffffffffffffff
non-canonical: 0x0100000000000000 - 0xfeffffffffffffff
The idea is that these helpers are complementary to the probe_user_read()
and strncpy_from_unsafe_user() which probe user-only memory. Both added
helpers here do the same, but for kernel-only addresses.
Both set of helpers are going to be used for BPF tracing. They also
explicitly avoid throwing the splat for non-canonical user addresses from
00c42373d397 ("x86-64: add warning for non-canonical user access address
dereferences").
For compat, the current probe_kernel_read() and strncpy_from_unsafe() are
left as-is.
[0] Documentation/x86/x86_64/mm.txt
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: x86@kernel.org
Link: https://lore.kernel.org/bpf/eefeefd769aa5a013531f491a71f0936779e916b.1572649915.git.daniel@iogearbox.net
---
arch/x86/mm/Makefile | 2 +-
arch/x86/mm/maccess.c | 43 +++++++++++++++++++++++++++++++++++++++++
include/linux/uaccess.h | 4 ++++
mm/maccess.c | 25 +++++++++++++++++++++++-
4 files changed, 72 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/mm/maccess.c
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 70941f49d66e..25ae650dcb1a 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -315,6 +315,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
* happens, handle that and return -EFAULT.
*/
extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/*
@@ -354,6 +355,9 @@ extern long notrace probe_user_write(void __user *dst, const void *src, size_t s
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+ long count);
+extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
diff --git a/mm/maccess.c b/mm/maccess.c
index 2d3c3d01064c..3ca8d97e5010 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -43,11 +43,20 @@ probe_write_common(void __user *dst, const void *src, size_t size)
* do_page_fault() doesn't attempt to take mmap_sem. This makes
* probe_kernel_read() suitable for use within regions where the caller
* already holds mmap_sem, or other locks which nest inside mmap_sem.
+ *
+ * probe_kernel_read_strict() is the same as probe_kernel_read() except for
+ * the case where architectures have non-overlapping user and kernel address
+ * ranges: probe_kernel_read_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where probe_user_read() is supposed
+ * to be used instead.
*/
long __weak probe_kernel_read(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_read")));
+long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
+ __attribute__((alias("__probe_kernel_read")));
+
long __probe_kernel_read(void *dst, const void *src, size_t size)
{
long ret;
@@ -157,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_user_write);
*
* If @count is smaller than the length of the string, copies @count-1 bytes,
* sets the last byte of @dst buffer to NUL and returns @count.
+ *
+ * strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
+ * for the case where architectures have non-overlapping user and kernel address
+ * ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where strncpy_from_unsafe_user() is
+ * supposed to be used instead.
*/
-long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+
+long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+ __attribute__((alias("__strncpy_from_unsafe")));
+
+long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+ long count)
+ __attribute__((alias("__strncpy_from_unsafe")));
+
+long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
{
mm_segment_t old_fs = get_fs();
const void *src = unsafe_addr;
--
2.40.1
next parent reply other threads:[~2023-05-22 20:34 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20230522203352.738576-1-jolsa@kernel.org>
2023-05-22 20:33 ` Jiri Olsa [this message]
2023-05-22 20:33 ` [RFC PATCH stable 5.4 2/8] bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 3/8] bpf: Restrict bpf_probe_read{, str}() only to archs where they work Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 4/8] maccess: clarify kerneldoc comments Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 5/8] maccess: rename strncpy_from_unsafe_user to strncpy_from_user_nofault Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 6/8] maccess: rename strncpy_from_unsafe_strict to strncpy_from_kernel_nofault Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 7/8] bpf: rework the compat kernel probe handling Jiri Olsa
2023-05-22 20:33 ` [RFC PATCH stable 5.4 8/8] bpf: bpf_probe_read_kernel_str() has to return amount of data read on success Jiri Olsa
2023-05-26 18:54 ` [RFC PATCH stable 5.4 0/8] bpf: Fix bpf_probe_read/bpf_probe_read_str helpers Greg KH
2023-05-28 20:02 ` Jiri Olsa
2023-05-29 8:37 ` Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230522203352.738576-2-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=hch@lst.de \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mahe.tardy@isovalent.com \
--cc=mhiramat@kernel.org \
--cc=stable@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=tsahee@annapurnalabs.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox