From: Matt Mackall <mpm@selenic.com>
To: Dave Hansen <haveblue@us.ibm.com>
Cc: linux-mm@kvack.org
Subject: Re: [RFC][PATCH 8/9] pagemap: use page walker pte_hole() helper
Date: Tue, 21 Aug 2007 17:01:03 -0500 [thread overview]
Message-ID: <20070821220103.GM30556@waste.org> (raw)
In-Reply-To: <20070821204257.BB3A4C17@kernel>
On Tue, Aug 21, 2007 at 01:42:57PM -0700, Dave Hansen wrote:
>
> I tried to do this a bit more incrementally, but it ended
> up just looking like an even worse mess. So, this does
> a a couple of different things.
>
> 1. use page walker pte_hole() helper, which
> 2. gets rid of the "next" value in "struct pagemapread"
> 3. allow 1-3 byte reads from pagemap. This at least
> ensures that we don't write over user memory if they
> ask us for 1 bytes and we tried to write 4.
> 4. Instead of trying to calculate what ranges of pages
> we are going to walk, simply start walking them,
> then return PAGEMAP_END_OF_BUFFER at the end of the
> buffer, error out, and stop walking.
> 5. enforce that reads must be algined to PM_ENTRY_BYTES
>
> Note that, despite these functional additions, and some
> nice new comments, this patch still removes more code
> than it adds.
Ok, but deleting my debugging printks doesn't count!
> Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
> ---
>
> lxc-dave/fs/proc/task_mmu.c | 123 ++++++++++++++++++++------------------------
> 1 file changed, 56 insertions(+), 67 deletions(-)
>
> diff -puN fs/proc/task_mmu.c~bail-instead-of-tracking fs/proc/task_mmu.c
> --- lxc/fs/proc/task_mmu.c~bail-instead-of-tracking 2007-08-21 13:30:54.000000000 -0700
> +++ lxc-dave/fs/proc/task_mmu.c 2007-08-21 13:30:54.000000000 -0700
> @@ -501,7 +501,6 @@ const struct file_operations proc_clear_
> };
>
> struct pagemapread {
> - unsigned long next;
> unsigned long pos;
> size_t count;
> int index;
> @@ -510,58 +509,64 @@ struct pagemapread {
>
> #define PM_ENTRY_BYTES sizeof(unsigned long)
> #define PM_NOT_PRESENT ((unsigned long)-1)
> +#define PAGEMAP_END_OF_BUFFER 1
>
> static int add_to_pagemap(unsigned long addr, unsigned long pfn,
> struct pagemapread *pm)
> {
> - __put_user(pfn, pm->out);
> - pm->out++;
> - pm->next = addr + PAGE_SIZE;
> + int out_len = PM_ENTRY_BYTES;
> + if (pm->count < PM_ENTRY_BYTES)
> + out_len = pm->count;
This wants an unlikely.
> + copy_to_user(pm->out, &pfn, out_len);
And I think we want to keep the put_user in the fast path.
> pm->pos += PM_ENTRY_BYTES;
> pm->count -= PM_ENTRY_BYTES;
> + if (pm->count <= 0)
> + return PAGEMAP_END_OF_BUFFER;
> return 0;
> }
>
> +static int pagemap_pte_hole(unsigned long start, unsigned long end,
> + void *private)
> +{
> + struct pagemapread *pm = private;
> + unsigned long addr;
> + int err = 0;
> + for (addr = start; addr < end; addr += PAGE_SIZE) {
> + err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
> + if (err)
> + break;
> + }
> + return err;
> +}
> +
> static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> void *private)
> {
> struct pagemapread *pm = private;
> pte_t *pte;
> - int err;
> + int err = 0;
>
> pte = pte_offset_map(pmd, addr);
> for (; addr != end; pte++, addr += PAGE_SIZE) {
> - if (addr < pm->next)
> - continue;
> - if (!pte_present(*pte))
> - err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
> - else
> - err = add_to_pagemap(addr, pte_pfn(*pte), pm);
> + unsigned long pfn = PM_NOT_PRESENT;
> + if (pte_present(*pte))
> + pfn = pte_pfn(*pte);
> + err = add_to_pagemap(addr, pfn, pm);
> if (err)
> return err;
> - if (pm->count == 0)
> - break;
> }
> pte_unmap(pte - 1);
>
> cond_resched();
>
> - return 0;
> + return err;
> }
>
> -static int pagemap_fill(struct pagemapread *pm, unsigned long end)
> +static struct mm_walk pagemap_walk =
> {
> - int ret;
> -
> - while (pm->next != end && pm->count > 0) {
> - ret = add_to_pagemap(pm->next, -1UL, pm);
> - if (ret)
> - return ret;
> - }
> - return 0;
> -}
> -
> -static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
> + .pmd_entry = pagemap_pte_range,
> + .pte_hole = pagemap_pte_hole
> +};
>
> /*
> * /proc/pid/pagemap - an array mapping virtual pages to pfns
> @@ -589,9 +594,8 @@ static ssize_t pagemap_read(struct file
> struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> unsigned long src = *ppos;
> struct page **pages, *page;
> - unsigned long addr, end, vend, svpfn, evpfn, uaddr, uend;
> + unsigned long uaddr, uend;
> struct mm_struct *mm;
> - struct vm_area_struct *vma;
> struct pagemapread pm;
> int pagecount;
> int ret = -ESRCH;
> @@ -603,16 +607,10 @@ static ssize_t pagemap_read(struct file
> if (!ptrace_may_attach(task))
> goto out;
>
> - ret = -EIO;
> - svpfn = src / PM_ENTRY_BYTES;
> - addr = PAGE_SIZE * svpfn;
> - if (svpfn * PM_ENTRY_BYTES != src)
> + ret = -EINVAL;
> + /* file position must be aligned */
> + if (*ppos % PM_ENTRY_BYTES)
> goto out;
> - evpfn = min((src + count) / sizeof(unsigned long) - 1,
> - ((~0UL) >> PAGE_SHIFT) + 1);
> - count = (evpfn - svpfn) * PM_ENTRY_BYTES;
> - end = PAGE_SIZE * evpfn;
> - //printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
>
> ret = 0;
> mm = get_task_mm(task);
> @@ -632,44 +630,36 @@ static ssize_t pagemap_read(struct file
> 1, 0, pages, NULL);
> up_read(¤t->mm->mmap_sem);
>
> - //printk("%x(%x):%x %d@%ld (%d pages) -> %d\n", uaddr, buf, uend, count, src, pagecount, ret);
> if (ret < 0)
> goto out_free;
>
> - pm.next = addr;
> pm.pos = src;
> pm.count = count;
> pm.out = (unsigned long __user *)buf;
>
> - down_read(&mm->mmap_sem);
> - vma = find_vma(mm, pm.next);
> - while (pm.count > 0 && vma) {
> - if (!ptrace_may_attach(task)) {
> - ret = -EIO;
> - up_read(&mm->mmap_sem);
> - goto out_release;
> - }
> - vend = min(vma->vm_end - 1, end - 1) + 1;
> - ret = pagemap_fill(&pm, vend);
> - if (ret || !pm.count)
> - break;
> - vend = min(vma->vm_end - 1, end - 1) + 1;
> - ret = walk_page_range(mm, vma->vm_start, vend,
> - &pagemap_walk, &pm);
> - vma = vma->vm_next;
> + if (!ptrace_may_attach(task)) {
> + ret = -EIO;
> + } else {
> + unsigned long src = *ppos;
> + unsigned long svpfn = src / PM_ENTRY_BYTES;
> + unsigned long start_vaddr = svpfn << PAGE_SHIFT;
> + unsigned long end_vaddr = TASK_SIZE_OF(task);
> + /*
> + * The odds are that this will stop walking way
> + * before end_vaddr, because the length of the
> + * user buffer is tracked in "pm", and the walk
> + * will stop when we hit the end of the buffer.
> + */
> + ret = walk_page_range(mm, start_vaddr, end_vaddr,
> + &pagemap_walk, &pm);
> + if (ret == PAGEMAP_END_OF_BUFFER)
> + ret = 0;
> + /* don't need mmap_sem for these, but this looks cleaner */
> + *ppos = pm.pos;
> + if (!ret)
> + ret = pm.pos - src;
> }
> - up_read(&mm->mmap_sem);
> -
> - //printk("before fill at %ld\n", pm.pos);
> - ret = pagemap_fill(&pm, end);
> -
> - printk("after fill at %ld\n", pm.pos);
> - *ppos = pm.pos;
> - if (!ret)
> - ret = pm.pos - src;
>
> -out_release:
> - printk("releasing pages\n");
> for (; pagecount; pagecount--) {
> page = pages[pagecount-1];
> if (!PageReserved(page))
> @@ -682,7 +672,6 @@ out_free:
> out_task:
> put_task_struct(task);
> out:
> - printk("returning\n");
> return ret;
> }
>
> _
--
Mathematics is the supreme nostalgia of our time.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-08-21 22:01 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-21 20:42 [RFC][PATCH 1/9] /proc/pid/pagemap update Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 2/9] pagemap: remove file header Dave Hansen
2007-08-21 21:24 ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
2007-08-21 21:25 ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
2007-08-21 21:26 ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 6/9] pagemap: give -1's a name Dave Hansen
2007-08-21 21:27 ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
2007-08-30 7:28 ` Nick Piggin
2007-09-03 12:32 ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
2007-08-21 22:01 ` Matt Mackall [this message]
2007-08-21 22:38 ` Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 9/9] pagemap: export swap ptes Dave Hansen
2007-08-21 21:49 ` Matt Mackall
2007-08-21 22:26 ` Dave Hansen
2007-08-21 21:23 ` [RFC][PATCH 1/9] /proc/pid/pagemap update Matt Mackall
2007-08-21 21:26 ` Dave Hansen
2007-08-21 22:07 ` Matt Mackall
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070821220103.GM30556@waste.org \
--to=mpm@selenic.com \
--cc=haveblue@us.ibm.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox