* [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
@ 2025-01-07 2:06 Jordan Rome
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Jordan Rome @ 2025-01-07 2:06 UTC (permalink / raw)
To: bpf
Cc: linux-mm, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Kernel Team, Andrew Morton, Shakeel Butt
This new kfunc will be able to copy a string
from another process's/task's address space.
This is similar to `bpf_copy_from_user_str`
but accepts a `struct task_struct*` argument.
This required adding an additional function
in memory.c, namely `copy_str_from_process_vm`,
which works similar to `access_process_vm`
but utilizes the `strncpy_from_user` helper
and only supports reading/copying and not writing.
Signed-off-by: Jordan Rome <linux@jordanrome.com>
---
include/linux/mm.h | 3 ++
kernel/bpf/helpers.c | 46 ++++++++++++++++++++
mm/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 150 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c39c4945946c..52b304b20630 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2484,6 +2484,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
+extern int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
+
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cd5f9884d85b..45d41b7a9906 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3072,6 +3072,51 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
local_irq_restore(*flags__irq_flag);
}
+/**
+ * bpf_copy_from_user_task_str() - Copy a string from an task's address space
+ * @dst: Destination address, in kernel space. This buffer must be
+ * at least @dst__sz bytes long.
+ * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
+ * @unsafe_ptr__ign: Source address in the task's address space.
+ * @tsk: The task whose address space will be used
+ * @flags: The only supported flag is BPF_F_PAD_ZEROS
+ *
+ * Copies a NULL-terminated string from a task's address space to BPF space.
+ * If user string is too long this will still ensure zero termination in the
+ * dst buffer unless buffer size is 0.
+ *
+ * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
+ * memset all of @dst on failure.
+ */
+__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags)
+{
+ int count = dst__sz - 1;
+ int ret = 0;
+
+ if (unlikely(flags & ~BPF_F_PAD_ZEROS))
+ return -EINVAL;
+
+ if (unlikely(!dst__sz))
+ return 0;
+
+ ret = copy_str_from_process_vm(tsk, (unsigned long)unsafe_ptr__ign, dst, count, 0);
+
+ if (ret <= 0) {
+ if (flags & BPF_F_PAD_ZEROS)
+ memset((char *)dst, 0, dst__sz);
+ return ret;
+ }
+
+ if (ret < count) {
+ if (flags & BPF_F_PAD_ZEROS)
+ memset((char *)dst + ret, 0, dst__sz - ret);
+ } else {
+ ((char *)dst)[count] = '\0';
+ }
+
+ return ret + 1;
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -3164,6 +3209,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
diff --git a/mm/memory.c b/mm/memory.c
index 75c2dfd04f72..514490bd7d6d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6673,6 +6673,75 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
return buf - old_buf;
}
+/*
+ * Copy a string from another process's address space as given in mm.
+ * Don't return partial results. If there is any error return -EFAULT.
+ */
+static int __copy_str_from_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ void *old_buf = buf;
+ int err = 0;
+
+ if (mmap_read_lock_killable(mm))
+ return -EFAULT;
+
+ /* Untag the address before looking up the VMA */
+ addr = untagged_addr_remote(mm, addr);
+
+ /* Avoid triggering the temporary warning in __get_user_pages */
+ if (!vma_lookup(mm, addr)) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+
+ while (len) {
+ int bytes, offset, retval;
+ void *maddr;
+ struct vm_area_struct *vma = NULL;
+ struct page *page = get_user_page_vma_remote(mm, addr,
+ gup_flags, &vma);
+
+ if (IS_ERR(page)) {
+ /*
+ * Treat as a total failure for now until we decide how
+ * to handle the CONFIG_HAVE_IOREMAP_PROT case and
+ * stack expansion.
+ */
+ err = -EFAULT;
+ break;
+ }
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE - 1);
+ if (bytes > PAGE_SIZE - offset)
+ bytes = PAGE_SIZE - offset;
+
+ maddr = kmap_local_page(page);
+ retval = strncpy_from_user(buf, (const char __user *)addr, bytes);
+ unmap_and_put_page(page, maddr);
+
+ if (retval < 0) {
+ err = retval;
+ break;
+ }
+
+ len -= retval;
+ buf += retval;
+ addr += retval;
+
+ /* Found the end of the string */
+ if (retval < bytes)
+ break;
+ }
+ mmap_read_unlock(mm);
+
+ if (err)
+ return err;
+
+ return buf - old_buf;
+}
+
/**
* access_remote_vm - access another process' address space
* @mm: the mm_struct of the target address space
@@ -6714,6 +6783,38 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
}
EXPORT_SYMBOL_GPL(access_process_vm);
+/**
+ * copy_str_from_process_vm - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to access
+ * @buf: source or destination buffer
+ * @len: number of bytes to transfer
+ * @gup_flags: flags modifying lookup behaviour
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from source to destination. If the string
+ * is shorter than @len then return the length of the string.
+ * On any error, return -EFAULT.
+ */
+int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EFAULT;
+
+ ret = __copy_str_from_remote_vm(mm, addr, buf, len, gup_flags);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_str_from_process_vm);
+
/*
* Print the name of a VMA.
*/
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str
2025-01-07 2:06 [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc Jordan Rome
@ 2025-01-07 2:06 ` Jordan Rome
2025-01-10 22:01 ` Andrii Nakryiko
2025-01-16 4:57 ` kernel test robot
2025-01-09 1:13 ` [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc kernel test robot
` (2 subsequent siblings)
3 siblings, 2 replies; 9+ messages in thread
From: Jordan Rome @ 2025-01-07 2:06 UTC (permalink / raw)
To: bpf
Cc: linux-mm, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Kernel Team, Andrew Morton, Shakeel Butt
This adds tests for both the happy path and the
error path (with and without the BPF_F_PAD_ZEROS flag).
Signed-off-by: Jordan Rome <linux@jordanrome.com>
---
.../selftests/bpf/prog_tests/bpf_iter.c | 7 +++
.../selftests/bpf/prog_tests/read_vsyscall.c | 1 +
.../selftests/bpf/progs/bpf_iter_tasks.c | 55 +++++++++++++++++++
.../selftests/bpf/progs/read_vsyscall.c | 6 +-
4 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f1bfacd7375..8ed864793bd1 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -34,6 +34,8 @@
#include "bpf_iter_ksym.skel.h"
#include "bpf_iter_sockmap.skel.h"
+static char test_data[] = "test_data";
+
static void test_btf_id_or_null(void)
{
struct bpf_iter_test_kern3 *skel;
@@ -328,12 +330,17 @@ static void test_task_sleepable(void)
if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
+ skel->bss->user_ptr = test_data;
do_dummy_read(skel->progs.dump_task_sleepable);
ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+ "num_expected_failure_copy_from_user_task_str");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+ "num_success_copy_from_user_task_str");
bpf_iter_tasks__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
index c7b9ba8b1d06..a8d1eaa67020 100644
--- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -24,6 +24,7 @@ struct read_ret_desc {
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
{ .name = "copy_from_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user_task_str", .ret = -EFAULT },
};
void test_read_vsyscall(void)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index bc10c4e4b4fa..90691e34b915 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -9,6 +9,7 @@ char _license[] SEC("license") = "GPL";
uint32_t tid = 0;
int num_unknown_tid = 0;
int num_known_tid = 0;
+void *user_ptr = 0;
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
@@ -35,7 +36,9 @@ int dump_task(struct bpf_iter__task *ctx)
}
int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
@@ -44,6 +47,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
+ char task_str1[10] = "aaaaaaaaaa";
+ char task_str2[10], task_str3[10];
+ char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
void *ptr;
uint32_t user_data = 0;
int ret;
@@ -78,8 +84,57 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
+
++num_success_copy_from_user_task;
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+ if (ret >= 0 || task_str1[9] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get error with pad zeros flag */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, BPF_F_PAD_ZEROS);
+ if (ret >= 0 || task_str1[9] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_expected_failure_copy_from_user_task_str;
+
+ /* Same length as the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+ if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str3, 9, user_ptr, task, 0);
+ if (bpf_strncmp(task_str3, 9, "test_dat\0") != 0 || ret != 9) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 || task_str4[sizeof(task_str4) - 1] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string with pad zeros flag */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 || task_str4[sizeof(task_str4) - 1] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task_str;
+
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
index 39ebef430059..623c1c5bd2d0 100644
--- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -8,14 +8,15 @@
int target_pid = 0;
void *user_ptr = 0;
-int read_ret[9];
+int read_ret[10];
char _license[] SEC("license") = "GPL";
/*
- * This is the only kfunc, the others are helpers
+ * These are the kfuncs, the others are helpers
*/
int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *, struct task_struct *, u64) __weak __ksym;
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
@@ -47,6 +48,7 @@ int do_copy_from_user(void *ctx)
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+ read_ret[9] = bpf_copy_from_user_task_str((char *)buf, sizeof(buf), user_ptr, bpf_get_current_task_btf(), 0);
return 0;
}
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
2025-01-07 2:06 [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc Jordan Rome
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
@ 2025-01-09 1:13 ` kernel test robot
2025-01-10 0:48 ` kernel test robot
2025-01-10 21:50 ` Andrii Nakryiko
3 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2025-01-09 1:13 UTC (permalink / raw)
To: Jordan Rome, bpf
Cc: llvm, oe-kbuild-all, linux-mm, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Kernel Team, Andrew Morton,
Shakeel Butt
Hi Jordan,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Jordan-Rome/selftests-bpf-Add-tests-for-bpf_copy_from_user_task_str/20250107-100850
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20250107020632.170883-1-linux%40jordanrome.com
patch subject: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
config: arm-imxrt_defconfig (https://download.01.org/0day-ci/archive/20250109/202501090817.0Uikg2ka-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250109/202501090817.0Uikg2ka-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501090817.0Uikg2ka-lkp@intel.com/
All errors (new ones prefixed by >>):
>> ld.lld: error: undefined symbol: copy_str_from_process_vm
>>> referenced by helpers.c
>>> kernel/bpf/helpers.o:(bpf_copy_from_user_task_str) in archive vmlinux.a
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
2025-01-07 2:06 [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc Jordan Rome
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
2025-01-09 1:13 ` [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc kernel test robot
@ 2025-01-10 0:48 ` kernel test robot
2025-01-10 21:50 ` Andrii Nakryiko
3 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2025-01-10 0:48 UTC (permalink / raw)
To: Jordan Rome, bpf
Cc: oe-kbuild-all, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt
Hi Jordan,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Jordan-Rome/selftests-bpf-Add-tests-for-bpf_copy_from_user_task_str/20250107-100850
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20250107020632.170883-1-linux%40jordanrome.com
patch subject: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
config: m68k-randconfig-r133-20250109 (https://download.01.org/0day-ci/archive/20250110/202501100833.R0PJXI6s-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 14.2.0
reproduce: (https://download.01.org/0day-ci/archive/20250110/202501100833.R0PJXI6s-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501100833.R0PJXI6s-lkp@intel.com/
All errors (new ones prefixed by >>):
m68k-linux-ld: kernel/bpf/helpers.o: in function `bpf_copy_from_user_task_str':
>> helpers.c:(.text+0x2a3a): undefined reference to `copy_str_from_process_vm'
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
2025-01-07 2:06 [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc Jordan Rome
` (2 preceding siblings ...)
2025-01-10 0:48 ` kernel test robot
@ 2025-01-10 21:50 ` Andrii Nakryiko
2025-01-17 2:22 ` Jordan Rome
3 siblings, 1 reply; 9+ messages in thread
From: Andrii Nakryiko @ 2025-01-10 21:50 UTC (permalink / raw)
To: Jordan Rome
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt
On Mon, Jan 6, 2025 at 6:12 PM Jordan Rome <linux@jordanrome.com> wrote:
>
> This new kfunc will be able to copy a string
> from another process's/task's address space.
nit: this is kernel code, task is unambiguous, so I'd drop the
"process" reference here
> This is similar to `bpf_copy_from_user_str`
> but accepts a `struct task_struct*` argument.
>
> This required adding an additional function
> in memory.c, namely `copy_str_from_process_vm`,
> which works similar to `access_process_vm`
> but utilizes the `strncpy_from_user` helper
> and only supports reading/copying and not writing.
>
> Signed-off-by: Jordan Rome <linux@jordanrome.com>
> ---
> include/linux/mm.h | 3 ++
> kernel/bpf/helpers.c | 46 ++++++++++++++++++++
> mm/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 150 insertions(+)
>
please check kernel test bot's complains as well
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index c39c4945946c..52b304b20630 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2484,6 +2484,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
> extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
> void *buf, int len, unsigned int gup_flags);
>
> +extern int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
> + void *buf, int len, unsigned int gup_flags);
nit: curious what mm folks think about naming, I'd go with a slightly
less verbose naming: "copy_remote_vm_str" (copy vs access, _str suffix
for non fixed-sized semantics marking)
for the next revision, let's split out mm parts from helpers parts, I
don't think we lose much as this new internal API is self-contained,
and it will be easier for mm folks to review
> +
> long get_user_pages_remote(struct mm_struct *mm,
> unsigned long start, unsigned long nr_pages,
> unsigned int gup_flags, struct page **pages,
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index cd5f9884d85b..45d41b7a9906 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3072,6 +3072,51 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
> local_irq_restore(*flags__irq_flag);
> }
>
> +/**
> + * bpf_copy_from_user_task_str() - Copy a string from an task's address space
> + * @dst: Destination address, in kernel space. This buffer must be
> + * at least @dst__sz bytes long.
> + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
> + * @unsafe_ptr__ign: Source address in the task's address space.
> + * @tsk: The task whose address space will be used
> + * @flags: The only supported flag is BPF_F_PAD_ZEROS
> + *
> + * Copies a NULL-terminated string from a task's address space to BPF space.
there is no "BPF space", really... maybe "copies string into *dst* buffer"
> + * If user string is too long this will still ensure zero termination in the
> + * dst buffer unless buffer size is 0.
> + *
> + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
> + * memset all of @dst on failure.
> + */
> +__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags)
this looks like a long line, does it fit under 100 characters?
> +{
> + int count = dst__sz - 1;
> + int ret = 0;
> +
> + if (unlikely(flags & ~BPF_F_PAD_ZEROS))
> + return -EINVAL;
> +
> + if (unlikely(!dst__sz))
> + return 0;
> +
> + ret = copy_str_from_process_vm(tsk, (unsigned long)unsafe_ptr__ign, dst, count, 0);
> +
> + if (ret <= 0) {
> + if (flags & BPF_F_PAD_ZEROS)
> + memset((char *)dst, 0, dst__sz);
nit: no need for (char *) cast? memset takes void *, I think
> + return ret;
if ret == 0, is that an error? If so, `return ret ?: -EINVAL;` or
something along those lines?
pw-bot: cr
> + }
> +
> + if (ret < count) {
> + if (flags & BPF_F_PAD_ZEROS)
> + memset((char *)dst + ret, 0, dst__sz - ret);
> + } else {
> + ((char *)dst)[count] = '\0';
> + }
> +
> + return ret + 1;
> +}
> +
> __bpf_kfunc_end_defs();
>
> BTF_KFUNCS_START(generic_btf_ids)
> @@ -3164,6 +3209,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
> BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> diff --git a/mm/memory.c b/mm/memory.c
> index 75c2dfd04f72..514490bd7d6d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -6673,6 +6673,75 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
> return buf - old_buf;
> }
>
> +/*
> + * Copy a string from another process's address space as given in mm.
> + * Don't return partial results. If there is any error return -EFAULT.
What does "don't return partial results" mean? What happens if we read
part of a string and then fail to read the rest?
> + */
> +static int __copy_str_from_remote_vm(struct mm_struct *mm, unsigned long addr,
> + void *buf, int len, unsigned int gup_flags)
> +{
> + void *old_buf = buf;
> + int err = 0;
> +
> + if (mmap_read_lock_killable(mm))
> + return -EFAULT;
> +
> + /* Untag the address before looking up the VMA */
> + addr = untagged_addr_remote(mm, addr);
> +
> + /* Avoid triggering the temporary warning in __get_user_pages */
> + if (!vma_lookup(mm, addr)) {
> + mmap_read_unlock(mm);
> + return -EFAULT;
maybe let's do (so that we do mmap_read_unlock in just one place)
err = -EFAULT;
goto err_out;
and then see below
> + }
> +
> + while (len) {
> + int bytes, offset, retval;
> + void *maddr;
> + struct vm_area_struct *vma = NULL;
> + struct page *page = get_user_page_vma_remote(mm, addr,
> + gup_flags, &vma);
> +
nit: I'd split page declaration and assignment and kept
get_user_page_vma_remote() invocation on a single line
> + if (IS_ERR(page)) {
> + /*
> + * Treat as a total failure for now until we decide how
> + * to handle the CONFIG_HAVE_IOREMAP_PROT case and
> + * stack expansion.
> + */
> + err = -EFAULT;
> + break;
> + }
> +
> + bytes = len;
> + offset = addr & (PAGE_SIZE - 1);
> + if (bytes > PAGE_SIZE - offset)
> + bytes = PAGE_SIZE - offset;
> +
> + maddr = kmap_local_page(page);
> + retval = strncpy_from_user(buf, (const char __user *)addr, bytes);
you are not using maddr... that seems wrong (even if it works due to
how kmap_local_page is currently implemented)
> + unmap_and_put_page(page, maddr);
> +
> + if (retval < 0) {
> + err = retval;
> + break;
> + }
> +
> + len -= retval;
> + buf += retval;
> + addr += retval;
> +
> + /* Found the end of the string */
> + if (retval < bytes)
> + break;
> + }
err_out: here
> + mmap_read_unlock(mm);
> +
> + if (err)
> + return err;
> +
> + return buf - old_buf;
> +}
> +
> /**
> * access_remote_vm - access another process' address space
> * @mm: the mm_struct of the target address space
> @@ -6714,6 +6783,38 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
> }
> EXPORT_SYMBOL_GPL(access_process_vm);
>
> +/**
> + * copy_str_from_process_vm - copy a string from another process's address space.
> + * @tsk: the task of the target address space
> + * @addr: start address to access
access -> read from
> + * @buf: source or destination buffer
for this api it's always the destination, right?
> + * @len: number of bytes to transfer
> + * @gup_flags: flags modifying lookup behaviour
> + *
> + * The caller must hold a reference on @mm.
> + *
> + * Return: number of bytes copied from source to destination. If the string
> + * is shorter than @len then return the length of the string.
and if the string is longer than @len, then what happens? we should
either specify or drop the "if string is shorter bit" and make it
unambiguous whether terminating zero is included or not
> + * On any error, return -EFAULT.
> + */
> +int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
> + void *buf, int len, unsigned int gup_flags)
> +{
> + struct mm_struct *mm;
> + int ret;
> +
> + mm = get_task_mm(tsk);
> + if (!mm)
> + return -EFAULT;
> +
> + ret = __copy_str_from_remote_vm(mm, addr, buf, len, gup_flags);
> +
> + mmput(mm);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(copy_str_from_process_vm);
> +
> /*
> * Print the name of a VMA.
> */
> --
> 2.43.5
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
@ 2025-01-10 22:01 ` Andrii Nakryiko
2025-01-16 4:57 ` kernel test robot
1 sibling, 0 replies; 9+ messages in thread
From: Andrii Nakryiko @ 2025-01-10 22:01 UTC (permalink / raw)
To: Jordan Rome
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt
On Mon, Jan 6, 2025 at 6:20 PM Jordan Rome <linux@jordanrome.com> wrote:
>
> This adds tests for both the happy path and the
> error path (with and without the BPF_F_PAD_ZEROS flag).
>
> Signed-off-by: Jordan Rome <linux@jordanrome.com>
> ---
> .../selftests/bpf/prog_tests/bpf_iter.c | 7 +++
> .../selftests/bpf/prog_tests/read_vsyscall.c | 1 +
> .../selftests/bpf/progs/bpf_iter_tasks.c | 55 +++++++++++++++++++
> .../selftests/bpf/progs/read_vsyscall.c | 6 +-
> 4 files changed, 67 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 6f1bfacd7375..8ed864793bd1 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -34,6 +34,8 @@
> #include "bpf_iter_ksym.skel.h"
> #include "bpf_iter_sockmap.skel.h"
>
> +static char test_data[] = "test_data";
> +
> static void test_btf_id_or_null(void)
> {
> struct bpf_iter_test_kern3 *skel;
> @@ -328,12 +330,17 @@ static void test_task_sleepable(void)
> if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
> return;
>
> + skel->bss->user_ptr = test_data;
> do_dummy_read(skel->progs.dump_task_sleepable);
>
> ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
> "num_expected_failure_copy_from_user_task");
> ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
> "num_success_copy_from_user_task");
> + ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
> + "num_expected_failure_copy_from_user_task_str");
> + ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
> + "num_success_copy_from_user_task_str");
>
> bpf_iter_tasks__destroy(skel);
> }
> diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> index c7b9ba8b1d06..a8d1eaa67020 100644
> --- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> @@ -24,6 +24,7 @@ struct read_ret_desc {
> { .name = "copy_from_user", .ret = -EFAULT },
> { .name = "copy_from_user_task", .ret = -EFAULT },
> { .name = "copy_from_user_str", .ret = -EFAULT },
> + { .name = "copy_from_user_task_str", .ret = -EFAULT },
> };
>
> void test_read_vsyscall(void)
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
> index bc10c4e4b4fa..90691e34b915 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
> @@ -9,6 +9,7 @@ char _license[] SEC("license") = "GPL";
> uint32_t tid = 0;
> int num_unknown_tid = 0;
> int num_known_tid = 0;
> +void *user_ptr = 0;
>
> SEC("iter/task")
> int dump_task(struct bpf_iter__task *ctx)
> @@ -35,7 +36,9 @@ int dump_task(struct bpf_iter__task *ctx)
> }
>
> int num_expected_failure_copy_from_user_task = 0;
> +int num_expected_failure_copy_from_user_task_str = 0;
> int num_success_copy_from_user_task = 0;
> +int num_success_copy_from_user_task_str = 0;
>
> SEC("iter.s/task")
> int dump_task_sleepable(struct bpf_iter__task *ctx)
> @@ -44,6 +47,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
> struct task_struct *task = ctx->task;
> static const char info[] = " === END ===";
> struct pt_regs *regs;
> + char task_str1[10] = "aaaaaaaaaa";
> + char task_str2[10], task_str3[10];
> + char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
> void *ptr;
> uint32_t user_data = 0;
> int ret;
> @@ -78,8 +84,57 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
> BPF_SEQ_PRINTF(seq, "%s\n", info);
> return 0;
> }
> +
> ++num_success_copy_from_user_task;
>
> + /* Read an invalid pointer and ensure we get an error */
> + ptr = NULL;
> + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
> + if (ret >= 0 || task_str1[9] != 'a') {
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
> + }
> +
> + /* Read an invalid pointer and ensure we get error with pad zeros flag */
> + ptr = NULL;
> + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, BPF_F_PAD_ZEROS);
> + if (ret >= 0 || task_str1[9] != '\0') {
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
> + }
> +
> + ++num_expected_failure_copy_from_user_task_str;
> +
> + /* Same length as the string */
> + ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
> + if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10) {
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
> + }
> +
> + /* Shorter length than the string */
> + ret = bpf_copy_from_user_task_str((char *)task_str3, 9, user_ptr, task, 0);
> + if (bpf_strncmp(task_str3, 9, "test_dat\0") != 0 || ret != 9) {
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
> + }
> +
> + /* Longer length than the string */
> + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
> + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 || task_str4[sizeof(task_str4) - 1] != 'a') {
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
> + }
> +
> + /* Longer length than the string with pad zeros flag */
> + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
> + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 || task_str4[sizeof(task_str4) - 1] != '\0') {
looks like a long string, please check 100 character limit
> + BPF_SEQ_PRINTF(seq, "%s\n", info);
> + return 0;
nit: this BPF_SEQ_PRINTF() + return 0 repetition is... repetitive :)
`goto drop_out;` maybe?
> + }
> +
> + ++num_success_copy_from_user_task_str;
> +
> if (ctx->meta->seq_num == 0)
> BPF_SEQ_PRINTF(seq, " tgid gid data\n");
>
> diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
> index 39ebef430059..623c1c5bd2d0 100644
> --- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
> +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
> @@ -8,14 +8,15 @@
>
> int target_pid = 0;
> void *user_ptr = 0;
> -int read_ret[9];
> +int read_ret[10];
>
> char _license[] SEC("license") = "GPL";
>
> /*
> - * This is the only kfunc, the others are helpers
> + * These are the kfuncs, the others are helpers
> */
> int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
> +int bpf_copy_from_user_task_str(void *dst, u32, const void *, struct task_struct *, u64) __weak __ksym;
these definitions should be coming from vmlinux.h, no need to add them
manually anymore
>
> SEC("fentry/" SYS_PREFIX "sys_nanosleep")
> int do_probe_read(void *ctx)
> @@ -47,6 +48,7 @@ int do_copy_from_user(void *ctx)
> read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
> bpf_get_current_task_btf(), 0);
> read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
> + read_ret[9] = bpf_copy_from_user_task_str((char *)buf, sizeof(buf), user_ptr, bpf_get_current_task_btf(), 0);
please drop all those (char *) casts, in C any pointer is implicitly
castable to `void *` and `void *` is (implicitly) castable to any
other pointer. C++ is stricter, but in C it's canonical to not do
casting
>
> return 0;
> }
> --
> 2.43.5
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
2025-01-10 22:01 ` Andrii Nakryiko
@ 2025-01-16 4:57 ` kernel test robot
1 sibling, 0 replies; 9+ messages in thread
From: kernel test robot @ 2025-01-16 4:57 UTC (permalink / raw)
To: Jordan Rome
Cc: oe-lkp, lkp, bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt,
oliver.sang
Hello,
kernel test robot noticed "WARNING:possible_recursive_locking_detected" on:
commit: 974e24f3e253a8e69418b73e486f74c6fa40e449 ("[bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str")
url: https://github.com/intel-lab-lkp/linux/commits/Jordan-Rome/selftests-bpf-Add-tests-for-bpf_copy_from_user_task_str/20250107-100850
base: https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/all/20250107020632.170883-2-linux@jordanrome.com/
patch subject: [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str
in testcase: kernel-selftests-bpf
version:
with following parameters:
group: bpf
config: x86_64-rhel-9.4-bpf
compiler: gcc-12
test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz (Kaby Lake) with 32G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202501161219.c031baa7-lkp@intel.com
[ 1645.762718][T49812] WARNING: possible recursive locking detected
[ 1645.768742][T49812] 6.13.0-rc3-00084-g974e24f3e253 #1 Tainted: G OE
[ 1645.776333][T49812] --------------------------------------------
[ 1645.782356][T49812] test_progs/49812 is trying to acquire lock:
[1645.788292][T49812] ffff88815d74c620 (&mm->mmap_lock){++++}-{4:4}, at: __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[ 1645.796945][T49812]
[ 1645.796945][T49812] but task is already holding lock:
[1645.804188][T49812] ffff88815d74c620 (&mm->mmap_lock){++++}-{4:4}, at: copy_str_from_process_vm (include/linux/mmap_lock.h:153 mm/memory.c:6686 mm/memory.c:6810)
[ 1645.813879][T49812]
[ 1645.813879][T49812] other info that might help us debug this:
[ 1645.821816][T49812] Possible unsafe locking scenario:
[ 1645.821816][T49812]
[ 1645.829146][T49812] CPU0
[ 1645.832298][T49812] ----
[ 1645.835450][T49812] lock(&mm->mmap_lock);
[ 1645.839652][T49812] lock(&mm->mmap_lock);
[ 1645.843865][T49812]
[ 1645.843865][T49812] *** DEADLOCK ***
[ 1645.843865][T49812]
[ 1645.851888][T49812] May be due to missing lock nesting notation
[ 1645.851888][T49812]
[ 1645.860086][T49812] 3 locks held by test_progs/49812:
[1645.865153][T49812] #0: ffff88835c5ab698 (&p->lock){+.+.}-{4:4}, at: bpf_seq_read (kernel/bpf/bpf_iter.c:105)
[1645.873715][T49812] #1: ffffffff84ca6ec0 (rcu_read_lock_trace){....}-{0:0}, at: bpf_iter_run_prog (include/linux/rcupdate.h:337 include/linux/rcupdate_trace.h:58 kernel/bpf/bpf_iter.c:700)
[1645.883680][T49812] #2: ffff88815d74c620 (&mm->mmap_lock){++++}-{4:4}, at: copy_str_from_process_vm (include/linux/mmap_lock.h:153 mm/memory.c:6686 mm/memory.c:6810)
[ 1645.893817][T49812]
[ 1645.893817][T49812] stack backtrace:
[ 1645.899581][T49812] CPU: 3 UID: 0 PID: 49812 Comm: test_progs Tainted: G OE 6.13.0-rc3-00084-g974e24f3e253 #1
[ 1645.910828][T49812] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
[ 1645.916848][T49812] Hardware name: Dell Inc. OptiPlex 7050/062KRH, BIOS 1.2.0 12/22/2016
[ 1645.924972][T49812] Call Trace:
[ 1645.928128][T49812] <TASK>
[1645.930938][T49812] dump_stack_lvl (lib/dump_stack.c:124)
[1645.935320][T49812] print_deadlock_bug (kernel/locking/lockdep.c:3040)
[1645.940224][T49812] validate_chain (kernel/locking/lockdep.c:3894)
[1645.944784][T49812] ? __pfx_validate_chain (kernel/locking/lockdep.c:3860)
[1645.949860][T49812] ? mark_lock (kernel/locking/lockdep.c:4727)
[1645.954062][T49812] __lock_acquire (kernel/locking/lockdep.c:5226)
[1645.958640][T49812] lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5851 kernel/locking/lockdep.c:5814)
[1645.963015][T49812] ? __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[1645.967479][T49812] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5817)
[1645.972377][T49812] ? __rcu_read_unlock (kernel/rcu/tree_plugin.h:440 (discriminator 2))
[1645.977187][T49812] ? mtree_load (lib/maple_tree.c:6337)
[1645.981563][T49812] ? lock_is_held_type (kernel/locking/lockdep.c:5590 kernel/locking/lockdep.c:5921)
[1645.986460][T49812] ? __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[1645.990923][T49812] __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[1645.995212][T49812] ? __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[1645.999699][T49812] strncpy_from_user (lib/strncpy_from_user.c:120)
[1646.004425][T49812] copy_str_from_process_vm (include/linux/page-flags.h:242 include/linux/highmem.h:661 mm/memory.c:6722 mm/memory.c:6810)
[1646.009845][T49812] ? __pfx_copy_str_from_process_vm (mm/memory.c:6802)
[1646.015789][T49812] bpf_copy_from_user_task_str (kernel/bpf/helpers.c:3104)
[1646.021297][T49812] bpf_prog_f57787fdd126684b_dump_task_sleepable+0x278/0x4d0
[1646.028546][T49812] ? __might_fault (mm/memory.c:6852 mm/memory.c:6845)
[1646.033011][T49812] bpf_iter_run_prog (include/linux/bpf.h:1290 include/linux/filter.h:701 include/linux/filter.h:708 kernel/bpf/bpf_iter.c:704)
[1646.037821][T49812] ? __pfx_bpf_iter_run_prog (kernel/bpf/bpf_iter.c:695)
[1646.043151][T49812] ? __pfx___lock_release+0x10/0x10
[1646.048837][T49812] task_seq_show (kernel/bpf/task_iter.c:193)
[1646.053124][T49812] ? __pfx_task_seq_show (kernel/bpf/task_iter.c:193)
[1646.058111][T49812] bpf_seq_read (kernel/bpf/bpf_iter.c:138)
[1646.062399][T49812] ? rw_verify_area (include/linux/fsnotify_backend.h:501 include/linux/fsnotify.h:24 include/linux/fsnotify.h:127 include/linux/fsnotify.h:153 fs/read_write.c:470)
[1646.067035][T49812] vfs_read (fs/read_write.c:563)
[1646.071062][T49812] ? __pfx___lock_release+0x10/0x10
[1646.076740][T49812] ? __pfx_vfs_read (fs/read_write.c:546)
[1646.081290][T49812] ? __pfx___lock_release+0x10/0x10
[1646.086968][T49812] ? __mutex_unlock_slowpath (arch/x86/include/asm/atomic64_64.h:101 include/linux/atomic/atomic-arch-fallback.h:4329 include/linux/atomic/atomic-long.h:1506 include/linux/atomic/atomic-instrumented.h:4481 kernel/locking/mutex.c:913)
[1646.092472][T49812] ? __fget_files (include/linux/rcupdate.h:347 include/linux/rcupdate.h:880 fs/file.c:1050)
[1646.097022][T49812] ? __rcu_read_unlock (kernel/rcu/tree_plugin.h:440 (discriminator 2))
[1646.101831][T49812] ? __fget_files (fs/file.c:1053)
[1646.106383][T49812] ksys_read (fs/read_write.c:709)
[1646.110410][T49812] ? __pfx_ksys_read (fs/read_write.c:698)
[1646.115042][T49812] ? fput (arch/x86/include/asm/atomic64_64.h:79 include/linux/atomic/atomic-arch-fallback.h:2913 include/linux/atomic/atomic-arch-fallback.h:3364 include/linux/atomic/atomic-long.h:698 include/linux/atomic/atomic-instrumented.h:3767 include/linux/file_ref.h:157 fs/file_table.c:501)
[1646.118809][T49812] ? ksys_read (fs/read_write.c:698)
[1646.123007][T49812] ? mark_held_locks (kernel/locking/lockdep.c:4309)
[1646.127662][T49812] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
[1646.132033][T49812] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406)
[1646.137887][T49812] ? syscall_exit_to_user_mode (arch/x86/include/asm/jump_label.h:36 include/linux/context_tracking_state.h:108 include/linux/context_tracking.h:41 include/linux/entry-common.h:364 kernel/entry/common.c:220)
[1646.143481][T49812] ? do_syscall_64 (arch/x86/entry/common.c:102)
[1646.148026][T49812] ? __pfx_vfs_read (fs/read_write.c:546)
[1646.152574][T49812] ? fput (arch/x86/include/asm/atomic64_64.h:79 include/linux/atomic/atomic-arch-fallback.h:2913 include/linux/atomic/atomic-arch-fallback.h:3364 include/linux/atomic/atomic-long.h:698 include/linux/atomic/atomic-instrumented.h:3767 include/linux/file_ref.h:157 fs/file_table.c:501)
[1646.156339][T49812] ? __fget_files (include/linux/rcupdate.h:347 include/linux/rcupdate.h:880 fs/file.c:1050)
[1646.160891][T49812] ? __rcu_read_unlock (kernel/rcu/tree_plugin.h:440 (discriminator 2))
[1646.165712][T49812] ? __fget_files (fs/file.c:1053)
[1646.170267][T49812] ? fput (arch/x86/include/asm/atomic64_64.h:79 include/linux/atomic/atomic-arch-fallback.h:2913 include/linux/atomic/atomic-arch-fallback.h:3364 include/linux/atomic/atomic-long.h:698 include/linux/atomic/atomic-instrumented.h:3767 include/linux/file_ref.h:157 fs/file_table.c:501)
[1646.174039][T49812] ? ksys_read (fs/read_write.c:698)
[1646.178243][T49812] ? __pfx_ksys_read (fs/read_write.c:698)
[1646.182887][T49812] ? mark_held_locks (kernel/locking/lockdep.c:4309)
[1646.187531][T49812] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406)
[1646.193388][T49812] ? syscall_exit_to_user_mode (arch/x86/include/asm/jump_label.h:36 include/linux/context_tracking_state.h:108 include/linux/context_tracking.h:41 include/linux/entry-common.h:364 kernel/entry/common.c:220)
[1646.198982][T49812] ? do_syscall_64 (arch/x86/entry/common.c:102)
[1646.203529][T49812] ? syscall_exit_to_user_mode (arch/x86/include/asm/jump_label.h:36 include/linux/context_tracking_state.h:108 include/linux/context_tracking.h:41 include/linux/entry-common.h:364 kernel/entry/common.c:220)
[1646.209122][T49812] ? do_syscall_64 (arch/x86/entry/common.c:102)
[1646.213695][T49812] ? __fget_files (fs/file.c:1053)
[1646.218244][T49812] ? __fget_files (include/linux/rcupdate.h:347 include/linux/rcupdate.h:880 fs/file.c:1050)
[1646.222794][T49812] ? __rcu_read_unlock (kernel/rcu/tree_plugin.h:440 (discriminator 2))
[1646.227602][T49812] ? __fget_files (fs/file.c:1053)
[1646.232151][T49812] ? fput (arch/x86/include/asm/atomic64_64.h:79 include/linux/atomic/atomic-arch-fallback.h:2913 include/linux/atomic/atomic-arch-fallback.h:3364 include/linux/atomic/atomic-long.h:698 include/linux/atomic/atomic-instrumented.h:3767 include/linux/file_ref.h:157 fs/file_table.c:501)
[1646.235917][T49812] ? ksys_read (fs/read_write.c:698)
[1646.240118][T49812] ? __pfx_ksys_read (fs/read_write.c:698)
[1646.244752][T49812] ? mark_held_locks (kernel/locking/lockdep.c:4309)
[1646.249391][T49812] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406)
[1646.255245][T49812] ? syscall_exit_to_user_mode (arch/x86/include/asm/jump_label.h:36 include/linux/context_tracking_state.h:108 include/linux/context_tracking.h:41 include/linux/entry-common.h:364 kernel/entry/common.c:220)
[1646.260837][T49812] ? do_syscall_64 (arch/x86/entry/common.c:102)
[1646.265383][T49812] ? do_syscall_64 (arch/x86/entry/common.c:102)
[1646.269930][T49812] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[ 1646.275697][T49812] RIP: 0033:0x7fdb112fe25c
[ 1646.279982][T49812] Code: ec 28 48 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 d9 d5 f8 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 48 89 44 24 08 e8 2f d6 f8 ff 48
All code
========
0: ec in (%dx),%al
1: 28 48 89 sub %cl,-0x77(%rax)
4: 54 push %rsp
5: 24 18 and $0x18,%al
7: 48 89 74 24 10 mov %rsi,0x10(%rsp)
c: 89 7c 24 08 mov %edi,0x8(%rsp)
10: e8 d9 d5 f8 ff call 0xfffffffffff8d5ee
15: 48 8b 54 24 18 mov 0x18(%rsp),%rdx
1a: 48 8b 74 24 10 mov 0x10(%rsp),%rsi
1f: 41 89 c0 mov %eax,%r8d
22: 8b 7c 24 08 mov 0x8(%rsp),%edi
26: 31 c0 xor %eax,%eax
28: 0f 05 syscall
2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
30: 77 34 ja 0x66
32: 44 89 c7 mov %r8d,%edi
35: 48 89 44 24 08 mov %rax,0x8(%rsp)
3a: e8 2f d6 f8 ff call 0xfffffffffff8d66e
3f: 48 rex.W
Code starting with the faulting instruction
===========================================
0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
6: 77 34 ja 0x3c
8: 44 89 c7 mov %r8d,%edi
b: 48 89 44 24 08 mov %rax,0x8(%rsp)
10: e8 2f d6 f8 ff call 0xfffffffffff8d644
15: 48 rex.W
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20250116/202501161219.c031baa7-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
2025-01-10 21:50 ` Andrii Nakryiko
@ 2025-01-17 2:22 ` Jordan Rome
2025-01-17 18:26 ` Andrii Nakryiko
0 siblings, 1 reply; 9+ messages in thread
From: Jordan Rome @ 2025-01-17 2:22 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt
On Fri, Jan 10, 2025 at 4:50 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Jan 6, 2025 at 6:12 PM Jordan Rome <linux@jordanrome.com> wrote:
> >
> > This new kfunc will be able to copy a string
> > from another process's/task's address space.
>
> nit: this is kernel code, task is unambiguous, so I'd drop the
> "process" reference here
>
> > This is similar to `bpf_copy_from_user_str`
> > but accepts a `struct task_struct*` argument.
> >
> > This required adding an additional function
> > in memory.c, namely `copy_str_from_process_vm`,
> > which works similar to `access_process_vm`
> > but utilizes the `strncpy_from_user` helper
> > and only supports reading/copying and not writing.
> >
> > Signed-off-by: Jordan Rome <linux@jordanrome.com>
> > ---
> > include/linux/mm.h | 3 ++
> > kernel/bpf/helpers.c | 46 ++++++++++++++++++++
> > mm/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 150 insertions(+)
> >
>
> please check kernel test bot's complains as well
Maybe I need an entry in nommu.c as well for 'copy_remote_vm_str'?
>
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index c39c4945946c..52b304b20630 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -2484,6 +2484,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
> > extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
> > void *buf, int len, unsigned int gup_flags);
> >
> > +extern int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
> > + void *buf, int len, unsigned int gup_flags);
>
> nit: curious what mm folks think about naming, I'd go with a slightly
> less verbose naming: "copy_remote_vm_str" (copy vs access, _str suffix
> for non fixed-sized semantics marking)
Ack.
>
> for the next revision, let's split out mm parts from helpers parts, I
> don't think we lose much as this new internal API is self-contained,
> and it will be easier for mm folks to review
Ack.
>
> > +
> > long get_user_pages_remote(struct mm_struct *mm,
> > unsigned long start, unsigned long nr_pages,
> > unsigned int gup_flags, struct page **pages,
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index cd5f9884d85b..45d41b7a9906 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3072,6 +3072,51 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
> > local_irq_restore(*flags__irq_flag);
> > }
> >
> > +/**
> > + * bpf_copy_from_user_task_str() - Copy a string from an task's address space
> > + * @dst: Destination address, in kernel space. This buffer must be
> > + * at least @dst__sz bytes long.
> > + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
> > + * @unsafe_ptr__ign: Source address in the task's address space.
> > + * @tsk: The task whose address space will be used
> > + * @flags: The only supported flag is BPF_F_PAD_ZEROS
> > + *
> > + * Copies a NULL-terminated string from a task's address space to BPF space.
>
> there is no "BPF space", really... maybe "copies string into *dst* buffer"
Ack.
>
> > + * If user string is too long this will still ensure zero termination in the
> > + * dst buffer unless buffer size is 0.
> > + *
> > + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
> > + * memset all of @dst on failure.
> > + */
> > +__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags)
>
> this looks like a long line, does it fit under 100 characters?
I'll fix it in the next revision.
>
> > +{
> > + int count = dst__sz - 1;
> > + int ret = 0;
> > +
> > + if (unlikely(flags & ~BPF_F_PAD_ZEROS))
> > + return -EINVAL;
> > +
> > + if (unlikely(!dst__sz))
> > + return 0;
> > +
> > + ret = copy_str_from_process_vm(tsk, (unsigned long)unsafe_ptr__ign, dst, count, 0);
> > +
> > + if (ret <= 0) {
> > + if (flags & BPF_F_PAD_ZEROS)
> > + memset((char *)dst, 0, dst__sz);
>
> nit: no need for (char *) cast? memset takes void *, I think
>
> > + return ret;
>
> if ret == 0, is that an error? If so, `return ret ?: -EINVAL;` or
> something along those lines?
Good catch.
>
> pw-bot: cr
>
> > + }
> > +
> > + if (ret < count) {
> > + if (flags & BPF_F_PAD_ZEROS)
> > + memset((char *)dst + ret, 0, dst__sz - ret);
> > + } else {
> > + ((char *)dst)[count] = '\0';
> > + }
> > +
> > + return ret + 1;
> > +}
> > +
> > __bpf_kfunc_end_defs();
> >
> > BTF_KFUNCS_START(generic_btf_ids)
> > @@ -3164,6 +3209,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> > BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> > BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> > BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
> > BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> > BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> > BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 75c2dfd04f72..514490bd7d6d 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -6673,6 +6673,75 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
> > return buf - old_buf;
> > }
> >
> > +/*
> > + * Copy a string from another process's address space as given in mm.
> > + * Don't return partial results. If there is any error return -EFAULT.
>
> What does "don't return partial results" mean? What happens if we read
> part of a string and then fail to read the rest?
As per the last sentence, if we fail to read the rest of the string then
an -EFAULT is returned.
>
> > + */
> > +static int __copy_str_from_remote_vm(struct mm_struct *mm, unsigned long addr,
> > + void *buf, int len, unsigned int gup_flags)
> > +{
> > + void *old_buf = buf;
> > + int err = 0;
> > +
> > + if (mmap_read_lock_killable(mm))
> > + return -EFAULT;
> > +
> > + /* Untag the address before looking up the VMA */
> > + addr = untagged_addr_remote(mm, addr);
> > +
> > + /* Avoid triggering the temporary warning in __get_user_pages */
> > + if (!vma_lookup(mm, addr)) {
> > + mmap_read_unlock(mm);
> > + return -EFAULT;
>
> maybe let's do (so that we do mmap_read_unlock in just one place)
Ack.
>
> err = -EFAULT;
> goto err_out;
>
> and then see below
>
> > + }
> > +
> > + while (len) {
> > + int bytes, offset, retval;
> > + void *maddr;
> > + struct vm_area_struct *vma = NULL;
> > + struct page *page = get_user_page_vma_remote(mm, addr,
> > + gup_flags, &vma);
> > +
>
> nit: I'd split page declaration and assignment and kept
> get_user_page_vma_remote() invocation on a single line
Ack.
>
> > + if (IS_ERR(page)) {
> > + /*
> > + * Treat as a total failure for now until we decide how
> > + * to handle the CONFIG_HAVE_IOREMAP_PROT case and
> > + * stack expansion.
> > + */
> > + err = -EFAULT;
> > + break;
> > + }
> > +
> > + bytes = len;
> > + offset = addr & (PAGE_SIZE - 1);
> > + if (bytes > PAGE_SIZE - offset)
> > + bytes = PAGE_SIZE - offset;
> > +
> > + maddr = kmap_local_page(page);
> > + retval = strncpy_from_user(buf, (const char __user *)addr, bytes);
>
> you are not using maddr... that seems wrong (even if it works due to
> how kmap_local_page is currently implemented)
How do you think we should handle it then?
>
> > + unmap_and_put_page(page, maddr);
> > +
> > + if (retval < 0) {
> > + err = retval;
> > + break;
> > + }
> > +
> > + len -= retval;
> > + buf += retval;
> > + addr += retval;
> > +
> > + /* Found the end of the string */
> > + if (retval < bytes)
> > + break;
> > + }
>
> err_out: here
>
> > + mmap_read_unlock(mm);
> > +
> > + if (err)
> > + return err;
> > +
> > + return buf - old_buf;
> > +}
> > +
> > /**
> > * access_remote_vm - access another process' address space
> > * @mm: the mm_struct of the target address space
> > @@ -6714,6 +6783,38 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
> > }
> > EXPORT_SYMBOL_GPL(access_process_vm);
> >
> > +/**
> > + * copy_str_from_process_vm - copy a string from another process's address space.
> > + * @tsk: the task of the target address space
> > + * @addr: start address to access
>
> access -> read from
Ack.
>
> > + * @buf: source or destination buffer
>
> for this api it's always the destination, right?
Right. Will fix.
>
> > + * @len: number of bytes to transfer
> > + * @gup_flags: flags modifying lookup behaviour
> > + *
> > + * The caller must hold a reference on @mm.
> > + *
> > + * Return: number of bytes copied from source to destination. If the string
> > + * is shorter than @len then return the length of the string.
>
> and if the string is longer than @len, then what happens? we should
> either specify or drop the "if string is shorter bit" and make it
> unambiguous whether terminating zero is included or not
I'll make it clearer.
>
> > + * On any error, return -EFAULT.
> > + */
> > +int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr,
> > + void *buf, int len, unsigned int gup_flags)
> > +{
> > + struct mm_struct *mm;
> > + int ret;
> > +
> > + mm = get_task_mm(tsk);
> > + if (!mm)
> > + return -EFAULT;
> > +
> > + ret = __copy_str_from_remote_vm(mm, addr, buf, len, gup_flags);
> > +
> > + mmput(mm);
> > +
> > + return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(copy_str_from_process_vm);
> > +
> > /*
> > * Print the name of a VMA.
> > */
> > --
> > 2.43.5
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc
2025-01-17 2:22 ` Jordan Rome
@ 2025-01-17 18:26 ` Andrii Nakryiko
0 siblings, 0 replies; 9+ messages in thread
From: Andrii Nakryiko @ 2025-01-17 18:26 UTC (permalink / raw)
To: Jordan Rome
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt
On Thu, Jan 16, 2025 at 6:22 PM Jordan Rome <linux@jordanrome.com> wrote:
>
> On Fri, Jan 10, 2025 at 4:50 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Jan 6, 2025 at 6:12 PM Jordan Rome <linux@jordanrome.com> wrote:
> > >
> > > This new kfunc will be able to copy a string
> > > from another process's/task's address space.
> >
> > nit: this is kernel code, task is unambiguous, so I'd drop the
> > "process" reference here
> >
> > > This is similar to `bpf_copy_from_user_str`
> > > but accepts a `struct task_struct*` argument.
> > >
> > > This required adding an additional function
> > > in memory.c, namely `copy_str_from_process_vm`,
> > > which works similar to `access_process_vm`
> > > but utilizes the `strncpy_from_user` helper
> > > and only supports reading/copying and not writing.
> > >
> > > Signed-off-by: Jordan Rome <linux@jordanrome.com>
> > > ---
> > > include/linux/mm.h | 3 ++
> > > kernel/bpf/helpers.c | 46 ++++++++++++++++++++
> > > mm/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++
> > > 3 files changed, 150 insertions(+)
> > >
> >
> > please check kernel test bot's complains as well
>
> Maybe I need an entry in nommu.c as well for 'copy_remote_vm_str'?
yep, probably, I see that access_remote_vm has two implementations as well
>
> >
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index c39c4945946c..52b304b20630 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
[...]
> > > +/*
> > > + * Copy a string from another process's address space as given in mm.
> > > + * Don't return partial results. If there is any error return -EFAULT.
> >
> > What does "don't return partial results" mean? What happens if we read
> > part of a string and then fail to read the rest?
>
> As per the last sentence, if we fail to read the rest of the string then
> an -EFAULT is returned.
This is confusing because the buffer will contain the partially read
string contents even with -EFAULT. And this "Don't return partial
results" can be interpreted as this API sanitizing the buffer (by
zeroing or something). So I'd drop the "Don't return partial results"
part, as it just creates confusion.
>
> >
> > > + */
[...]
> >
> > > + if (IS_ERR(page)) {
> > > + /*
> > > + * Treat as a total failure for now until we decide how
> > > + * to handle the CONFIG_HAVE_IOREMAP_PROT case and
> > > + * stack expansion.
> > > + */
> > > + err = -EFAULT;
> > > + break;
> > > + }
> > > +
> > > + bytes = len;
> > > + offset = addr & (PAGE_SIZE - 1);
> > > + if (bytes > PAGE_SIZE - offset)
> > > + bytes = PAGE_SIZE - offset;
> > > +
> > > + maddr = kmap_local_page(page);
> > > + retval = strncpy_from_user(buf, (const char __user *)addr, bytes);
> >
> > you are not using maddr... that seems wrong (even if it works due to
> > how kmap_local_page is currently implemented)
>
> How do you think we should handle it then?
Use (maddr + offset_within_the_page) instead of addr itself?
>
> >
> > > + unmap_and_put_page(page, maddr);
> > > +
> > > + if (retval < 0) {
> > > + err = retval;
> > > + break;
> > > + }
> > > +
> > > + len -= retval;
> > > + buf += retval;
> > > + addr += retval;
> > > +
> > > + /* Found the end of the string */
> > > + if (retval < bytes)
> > > + break;
> > > + }
> >
[...]
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-01-17 18:26 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-01-07 2:06 [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc Jordan Rome
2025-01-07 2:06 ` [bpf-next v2 2/2] selftests/bpf: Add tests for bpf_copy_from_user_task_str Jordan Rome
2025-01-10 22:01 ` Andrii Nakryiko
2025-01-16 4:57 ` kernel test robot
2025-01-09 1:13 ` [bpf-next v2 1/2] bpf: Add bpf_copy_from_user_task_str kfunc kernel test robot
2025-01-10 0:48 ` kernel test robot
2025-01-10 21:50 ` Andrii Nakryiko
2025-01-17 2:22 ` Jordan Rome
2025-01-17 18:26 ` Andrii Nakryiko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox