* [bpf-next v3 1/3] mm: add copy_remote_vm_str
@ 2025-01-24 18:16 Jordan Rome
2025-01-25 0:08 ` Andrii Nakryiko
0 siblings, 1 reply; 4+ messages in thread
From: Jordan Rome @ 2025-01-24 18:16 UTC (permalink / raw)
To: bpf
Cc: linux-mm, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Kernel Team, Andrew Morton, Shakeel Butt, Alexander Potapenko
Similar to `access_process_vm` but specific to strings.
Also chunks reads by page and utilizes `strscpy`
for handling null termination.
Signed-off-by: Jordan Rome <linux@jordanrome.com>
---
include/linux/mm.h | 3 ++
mm/memory.c | 119 +++++++++++++++++++++++++++++++++++++++++++++
mm/nommu.c | 68 ++++++++++++++++++++++++++
3 files changed, 190 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f02925447e59..f3a05b3eb2f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2485,6 +2485,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
+extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
+
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
diff --git a/mm/memory.c b/mm/memory.c
index 398c031be9ba..905e3f10fad0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6714,6 +6714,125 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
}
EXPORT_SYMBOL_GPL(access_process_vm);
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ void *old_buf = buf;
+ int err = 0;
+
+ if (mmap_read_lock_killable(mm))
+ return -EFAULT;
+
+ /* Untag the address before looking up the VMA */
+ addr = untagged_addr_remote(mm, addr);
+
+ /* Avoid triggering the temporary warning in __get_user_pages */
+ if (!vma_lookup(mm, addr)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ while (len) {
+ int bytes, offset, retval, end;
+ void *maddr;
+ struct page *page;
+ struct vm_area_struct *vma = NULL;
+
+ page = get_user_page_vma_remote(mm, addr, gup_flags, &vma);
+
+ if (IS_ERR(page)) {
+ /*
+ * Treat as a total failure for now until we decide how
+ * to handle the CONFIG_HAVE_IOREMAP_PROT case and
+ * stack expansion.
+ */
+ err = -EFAULT;
+ goto out;
+ }
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE - 1);
+ if (bytes > PAGE_SIZE - offset)
+ bytes = PAGE_SIZE - offset;
+
+ maddr = kmap_local_page(page);
+ retval = strscpy(buf, maddr + offset, bytes);
+ unmap_and_put_page(page, maddr);
+
+ if (retval > -1 && retval < bytes) {
+ /* found the end of the string */
+ buf += retval;
+ goto out;
+ }
+
+ if (retval == -E2BIG) {
+ retval = bytes;
+ /*
+ * Because strscpy always null terminates we need to
+ * copy the last byte in the page if we are going to
+ * load more pages
+ */
+ if (bytes < len) {
+ end = bytes - 1;
+ copy_from_user_page(vma,
+ page,
+ addr + end,
+ buf + end,
+ maddr + (PAGE_SIZE - 1),
+ 1);
+ }
+ }
+
+ len -= retval;
+ buf += retval;
+ addr += retval;
+ }
+
+out:
+ mmap_read_unlock(mm);
+ if (err)
+ return err;
+
+ return buf - old_buf;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to transfer
+ * @gup_flags: flags modifying lookup behaviour
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination).
+ * If the source string is shorter than @len then return the length of the
+ * source string. If the source string is longer than @len, return @len.
+ * On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EFAULT;
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
/*
* Print the name of a VMA.
*/
diff --git a/mm/nommu.c b/mm/nommu.c
index 9cb6e99215e2..23281751b1eb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1701,6 +1701,74 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
}
EXPORT_SYMBOL_GPL(access_process_vm);
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len)
+{
+ int ret = 0;
+
+ if (mmap_read_lock_killable(mm))
+ return -EFAULT;
+
+ /* the access must start within one of the target process's mappings */
+ vma = find_vma(mm, addr);
+ if (vma) {
+ /* don't overrun this mapping */
+ if (addr + len >= vma->vm_end)
+ len = vma->vm_end - addr;
+
+ /* only read mappings where it is permitted */
+ if (vma->vm_flags & VM_MAYREAD) {
+ ret = strscpy(buf, addr, len);
+ if (ret == -E2BIG)
+ ret = len;
+ } else {
+ ret = -EFAULT;
+ }
+ } else {
+ ret = -EFAULT;
+ }
+
+ mmap_read_unlock(mm);
+ return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to transfer
+ * @gup_flags: flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination).
+ * If the source string is shorter than @len then return the length of the
+ * source string. If the source string is longer than @len, return @len.
+ * On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EFAULT;
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+
/**
* nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
* @inode: The inode to check
--
2.43.5
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [bpf-next v3 1/3] mm: add copy_remote_vm_str
2025-01-24 18:16 [bpf-next v3 1/3] mm: add copy_remote_vm_str Jordan Rome
@ 2025-01-25 0:08 ` Andrii Nakryiko
2025-01-25 14:01 ` Jordan Rome
0 siblings, 1 reply; 4+ messages in thread
From: Andrii Nakryiko @ 2025-01-25 0:08 UTC (permalink / raw)
To: Jordan Rome
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt,
Alexander Potapenko
On Fri, Jan 24, 2025 at 10:22 AM Jordan Rome <linux@jordanrome.com> wrote:
>
> Similar to `access_process_vm` but specific to strings.
> Also chunks reads by page and utilizes `strscpy`
> for handling null termination.
>
> Signed-off-by: Jordan Rome <linux@jordanrome.com>
> ---
> include/linux/mm.h | 3 ++
> mm/memory.c | 119 +++++++++++++++++++++++++++++++++++++++++++++
> mm/nommu.c | 68 ++++++++++++++++++++++++++
> 3 files changed, 190 insertions(+)
>
[...]
> + maddr = kmap_local_page(page);
> + retval = strscpy(buf, maddr + offset, bytes);
> + unmap_and_put_page(page, maddr);
> +
> + if (retval > -1 && retval < bytes) {
> + /* found the end of the string */
> + buf += retval;
> + goto out;
> + }
> +
> + if (retval == -E2BIG) {
nit: strscpy() can't return any other error, so I'd structure result
handling as:
if (retval < 0) {
/* that annoying last byte copy */
retval = bytes;
}
if (retval < bytes) {
/* "we are done" handling */
}
/* common len, buf, addr adjustment logic stays here */
but also here's the question. If we get E2BIG, while bytes is exactly
how many bytes we have left in the buffer, the last byte should be
zero, no? So this should be cleanly handled, right? Or do we have a
test for that and it works already?
> + retval = bytes;
> + /*
> + * Because strscpy always null terminates we need to
> + * copy the last byte in the page if we are going to
> + * load more pages
> + */
> + if (bytes < len) {
> + end = bytes - 1;
> + copy_from_user_page(vma,
> + page,
> + addr + end,
> + buf + end,
you don't need the `end` variable, just use `bytes - 1` twice?
> + maddr + (PAGE_SIZE - 1),
> + 1);
> + }
> + }
> +
> + len -= retval;
> + buf += retval;
> + addr += retval;
> + }
> +
> +out:
> + mmap_read_unlock(mm);
> + if (err)
> + return err;
> +
> + return buf - old_buf;
> +}
> +
> +/**
> + * copy_remote_vm_str - copy a string from another process's address space.
> + * @tsk: the task of the target address space
> + * @addr: start address to read from
> + * @buf: destination buffer
> + * @len: number of bytes to transfer
> + * @gup_flags: flags modifying lookup behaviour
> + *
> + * The caller must hold a reference on @mm.
> + *
> + * Return: number of bytes copied from @addr (source) to @buf (destination).
> + * If the source string is shorter than @len then return the length of the
> + * source string. If the source string is longer than @len, return @len.
> + * On any error, return -EFAULT.
strncpy_from_user_nofault() doc says:
On success, returns the length of the string INCLUDING the trailing NUL
Is this the case with copy_remote_vm_str() as well? I.e., if the
source string is 5 bytes + NUL, dst buf is 10. Will we get 5 or 6
returned? We should be very careful with all this +/- 1 business in
corner cases, too easy to mess this up.
> + */
> +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> + void *buf, int len, unsigned int gup_flags)
> +{
> + struct mm_struct *mm;
> + int ret;
> +
> + mm = get_task_mm(tsk);
> + if (!mm)
> + return -EFAULT;
> +
> + ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
> +
> + mmput(mm);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(copy_remote_vm_str);
> +
[...]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [bpf-next v3 1/3] mm: add copy_remote_vm_str
2025-01-25 0:08 ` Andrii Nakryiko
@ 2025-01-25 14:01 ` Jordan Rome
2025-01-27 19:21 ` Andrii Nakryiko
0 siblings, 1 reply; 4+ messages in thread
From: Jordan Rome @ 2025-01-25 14:01 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt,
Alexander Potapenko
On Fri, Jan 24, 2025 at 7:09 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Jan 24, 2025 at 10:22 AM Jordan Rome <linux@jordanrome.com> wrote:
> >
> > Similar to `access_process_vm` but specific to strings.
> > Also chunks reads by page and utilizes `strscpy`
> > for handling null termination.
> >
> > Signed-off-by: Jordan Rome <linux@jordanrome.com>
> > ---
> > include/linux/mm.h | 3 ++
> > mm/memory.c | 119 +++++++++++++++++++++++++++++++++++++++++++++
> > mm/nommu.c | 68 ++++++++++++++++++++++++++
> > 3 files changed, 190 insertions(+)
> >
>
> [...]
>
> > + maddr = kmap_local_page(page);
> > + retval = strscpy(buf, maddr + offset, bytes);
> > + unmap_and_put_page(page, maddr);
> > +
> > + if (retval > -1 && retval < bytes) {
> > + /* found the end of the string */
> > + buf += retval;
> > + goto out;
> > + }
> > +
> > + if (retval == -E2BIG) {
>
> nit: strscpy() can't return any other error, so I'd structure result
> handling as:
>
> if (retval < 0) {
> /* that annoying last byte copy */
> retval = bytes;
> }
> if (retval < bytes) {
> /* "we are done" handling */
> }
>
> /* common len, buf, addr adjustment logic stays here */
>
Ack. Actually, I thought of a way to make this cleaner
and correct.
>
> but also here's the question. If we get E2BIG, while bytes is exactly
> how many bytes we have left in the buffer, the last byte should be
> zero, no? So this should be cleanly handled, right? Or do we have a
> test for that and it works already?
>
Ok, I found an inconsistency that gets handled in the BPF helper
`bpf_copy_from_user_task_str`, which I'm going to fix in the next
version of this patch.
Let me explain how this function is SUPPOSED to work
and enumerate some different test cases (a lot of which are in commit 3).
Note1: all of the target strings
are null terminated (if you try to copy a string that's not null terminated
you may accidentally copy junk).
Note2: strscopy returns E2BIG if the len requested isn't as long
as the string INCLUDING the nul terminator. So if you want to copy
"test_data\0" you need to request 10 bytes not 9. And if you request
10 or anything greater it returns 9.
Note3: This function, as opposed to the bpf helper calling
this function, returns the number copied NOT including the nul terminator.
So... the target string is "test_data".
Request 10 bytes, return 9.
Request 2 bytes, return 1.
Request 20 bytes, return 9.
Now, let's say this string falls across a page boundary
where "_" is the last character in the page.
Request 10 bytes, which becomes a request
for 5 bytes for the first page. strscopy returns E2BIG
and copies "test\0" into the buffer. We copy the last
bytes of the page into the buffer, which is the "_",
and then request 5 more bytes on the next page,
copying "data\0" and strscopy returns 4. Return 9.
Now let's say the last "a" is the last character on the page.
Request 10 bytes, which becomes a request
for 9 bytes. strscopy returns E2BIG and copies "test_dat\0"
into the buffer. Once again we copy the last byte
of the page into the buffer, which is "a"
and we request 1 more byte of the next page, which
is the nul terminator. strscopy returns 0 and this
function returns 9.
Again note, that this version of the code has a bug
that is "handled" by the bpf helper and I'm going to fix.
> > + retval = bytes;
> > + /*
> > + * Because strscpy always null terminates we need to
> > + * copy the last byte in the page if we are going to
> > + * load more pages
> > + */
> > + if (bytes < len) {
> > + end = bytes - 1;
> > + copy_from_user_page(vma,
> > + page,
> > + addr + end,
> > + buf + end,
>
> you don't need the `end` variable, just use `bytes - 1` twice?
>
> > + maddr + (PAGE_SIZE - 1),
> > + 1);
> > + }
> > + }
> > +
> > + len -= retval;
> > + buf += retval;
> > + addr += retval;
> > + }
> > +
> > +out:
> > + mmap_read_unlock(mm);
> > + if (err)
> > + return err;
> > +
> > + return buf - old_buf;
> > +}
> > +
> > +/**
> > + * copy_remote_vm_str - copy a string from another process's address space.
> > + * @tsk: the task of the target address space
> > + * @addr: start address to read from
> > + * @buf: destination buffer
> > + * @len: number of bytes to transfer
> > + * @gup_flags: flags modifying lookup behaviour
> > + *
> > + * The caller must hold a reference on @mm.
> > + *
> > + * Return: number of bytes copied from @addr (source) to @buf (destination).
> > + * If the source string is shorter than @len then return the length of the
> > + * source string. If the source string is longer than @len, return @len.
> > + * On any error, return -EFAULT.
>
> strncpy_from_user_nofault() doc says:
>
> On success, returns the length of the string INCLUDING the trailing NUL
>
> Is this the case with copy_remote_vm_str() as well? I.e., if the
> source string is 5 bytes + NUL, dst buf is 10. Will we get 5 or 6
> returned? We should be very careful with all this +/- 1 business in
> corner cases, too easy to mess this up.
>
Explained above.
> > + */
> > +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
> > + void *buf, int len, unsigned int gup_flags)
> > +{
> > + struct mm_struct *mm;
> > + int ret;
> > +
> > + mm = get_task_mm(tsk);
> > + if (!mm)
> > + return -EFAULT;
> > +
> > + ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
> > +
> > + mmput(mm);
> > +
> > + return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(copy_remote_vm_str);
> > +
>
> [...]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [bpf-next v3 1/3] mm: add copy_remote_vm_str
2025-01-25 14:01 ` Jordan Rome
@ 2025-01-27 19:21 ` Andrii Nakryiko
0 siblings, 0 replies; 4+ messages in thread
From: Andrii Nakryiko @ 2025-01-27 19:21 UTC (permalink / raw)
To: Jordan Rome
Cc: bpf, linux-mm, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Andrew Morton, Shakeel Butt,
Alexander Potapenko
On Sat, Jan 25, 2025 at 6:02 AM Jordan Rome <linux@jordanrome.com> wrote:
>
> On Fri, Jan 24, 2025 at 7:09 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Jan 24, 2025 at 10:22 AM Jordan Rome <linux@jordanrome.com> wrote:
> > >
> > > Similar to `access_process_vm` but specific to strings.
> > > Also chunks reads by page and utilizes `strscpy`
> > > for handling null termination.
> > >
> > > Signed-off-by: Jordan Rome <linux@jordanrome.com>
> > > ---
> > > include/linux/mm.h | 3 ++
> > > mm/memory.c | 119 +++++++++++++++++++++++++++++++++++++++++++++
> > > mm/nommu.c | 68 ++++++++++++++++++++++++++
> > > 3 files changed, 190 insertions(+)
> > >
> >
> > [...]
> >
> > > + maddr = kmap_local_page(page);
> > > + retval = strscpy(buf, maddr + offset, bytes);
> > > + unmap_and_put_page(page, maddr);
> > > +
> > > + if (retval > -1 && retval < bytes) {
> > > + /* found the end of the string */
> > > + buf += retval;
> > > + goto out;
> > > + }
> > > +
> > > + if (retval == -E2BIG) {
> >
> > nit: strscpy() can't return any other error, so I'd structure result
> > handling as:
> >
> > if (retval < 0) {
> > /* that annoying last byte copy */
> > retval = bytes;
> > }
> > if (retval < bytes) {
> > /* "we are done" handling */
> > }
> >
> > /* common len, buf, addr adjustment logic stays here */
> >
>
> Ack. Actually, I thought of a way to make this cleaner
> and correct.
>
> >
> > but also here's the question. If we get E2BIG, while bytes is exactly
> > how many bytes we have left in the buffer, the last byte should be
> > zero, no? So this should be cleanly handled, right? Or do we have a
> > test for that and it works already?
> >
>
> Ok, I found an inconsistency that gets handled in the BPF helper
> `bpf_copy_from_user_task_str`, which I'm going to fix in the next
> version of this patch.
>
> Let me explain how this function is SUPPOSED to work
> and enumerate some different test cases (a lot of which are in commit 3).
>
> Note1: all of the target strings
> are null terminated (if you try to copy a string that's not null terminated
> you may accidentally copy junk).
>
> Note2: strscopy returns E2BIG if the len requested isn't as long
> as the string INCLUDING the nul terminator. So if you want to copy
> "test_data\0" you need to request 10 bytes not 9. And if you request
> 10 or anything greater it returns 9.
yeah, that's actually smart for strscpy() to have this protocol, it
allows to read full string and know that it's full (if the string fits
exactly into the dst buffer). But I think on the BPF side we have a
convention to return the size *including* the NUL byte, so we get a
bit of inefficiency. But oh well, I think consistency is better to be
maintained.
>
> Note3: This function, as opposed to the bpf helper calling
> this function, returns the number copied NOT including the nul terminator.
>
> So... the target string is "test_data".
>
> Request 10 bytes, return 9.
> Request 2 bytes, return 1.
> Request 20 bytes, return 9.
>
> Now, let's say this string falls across a page boundary
> where "_" is the last character in the page.
>
> Request 10 bytes, which becomes a request
> for 5 bytes for the first page. strscopy returns E2BIG
> and copies "test\0" into the buffer. We copy the last
> bytes of the page into the buffer, which is the "_",
> and then request 5 more bytes on the next page,
> copying "data\0" and strscopy returns 4. Return 9.
>
> Now let's say the last "a" is the last character on the page.
> Request 10 bytes, which becomes a request
> for 9 bytes. strscopy returns E2BIG and copies "test_dat\0"
> into the buffer. Once again we copy the last byte
> of the page into the buffer, which is "a"
> and we request 1 more byte of the next page, which
> is the nul terminator. strscopy returns 0 and this
> function returns 9.
so I was worried about the situation where the string is test_data|\0,
like you describe (where | is denoting page boundary, NUL is right
after the page end), and we request 9 bytes (not 10 like in your
example). By manually copying that "a" (last byte of the page), we
might not return zero terminated dst buf (that was my theory). Would
be good to have a test proving otherwise.
But I'll go and read the latest version, maybe I'm overthinking this.
>
> Again note, that this version of the code has a bug
> that is "handled" by the bpf helper and I'm going to fix.
>
> > > + retval = bytes;
> > > + /*
> > > + * Because strscpy always null terminates we need to
> > > + * copy the last byte in the page if we are going to
> > > + * load more pages
> > > + */
> > > + if (bytes < len) {
> > > + end = bytes - 1;
> > > + copy_from_user_page(vma,
> > > + page,
> > > + addr + end,
> > > + buf + end,
[...]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-01-27 19:27 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-01-24 18:16 [bpf-next v3 1/3] mm: add copy_remote_vm_str Jordan Rome
2025-01-25 0:08 ` Andrii Nakryiko
2025-01-25 14:01 ` Jordan Rome
2025-01-27 19:21 ` Andrii Nakryiko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox