linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Matt Mackall <mpm@selenic.com>
To: Dave Hansen <haveblue@us.ibm.com>
Cc: linux-mm@kvack.org
Subject: Re: [RFC][PATCH 8/9] pagemap: use page walker pte_hole() helper
Date: Tue, 21 Aug 2007 17:01:03 -0500	[thread overview]
Message-ID: <20070821220103.GM30556@waste.org> (raw)
In-Reply-To: <20070821204257.BB3A4C17@kernel>

On Tue, Aug 21, 2007 at 01:42:57PM -0700, Dave Hansen wrote:
> 
> I tried to do this a bit more incrementally, but it ended
> up just looking like an even worse mess.  So, this does
> a a couple of different things.
> 
> 1. use page walker pte_hole() helper, which
> 2. gets rid of the "next" value in "struct pagemapread"
> 3. allow 1-3 byte reads from pagemap.  This at least
>    ensures that we don't write over user memory if they
>    ask us for 1 bytes and we tried to write 4.
> 4. Instead of trying to calculate what ranges of pages
>    we are going to walk, simply start walking them,
>    then return PAGEMAP_END_OF_BUFFER at the end of the
>    buffer, error out, and stop walking.
> 5. enforce that reads must be algined to PM_ENTRY_BYTES
> 
> Note that, despite these functional additions, and some
> nice new comments, this patch still removes more code
> than it adds.

Ok, but deleting my debugging printks doesn't count!

> Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
> ---
> 
>  lxc-dave/fs/proc/task_mmu.c |  123 ++++++++++++++++++++------------------------
>  1 file changed, 56 insertions(+), 67 deletions(-)
> 
> diff -puN fs/proc/task_mmu.c~bail-instead-of-tracking fs/proc/task_mmu.c
> --- lxc/fs/proc/task_mmu.c~bail-instead-of-tracking	2007-08-21 13:30:54.000000000 -0700
> +++ lxc-dave/fs/proc/task_mmu.c	2007-08-21 13:30:54.000000000 -0700
> @@ -501,7 +501,6 @@ const struct file_operations proc_clear_
>  };
>  
>  struct pagemapread {
> -	unsigned long next;
>  	unsigned long pos;
>  	size_t count;
>  	int index;
> @@ -510,58 +509,64 @@ struct pagemapread {
>  
>  #define PM_ENTRY_BYTES sizeof(unsigned long)
>  #define PM_NOT_PRESENT ((unsigned long)-1)
> +#define PAGEMAP_END_OF_BUFFER 1
>  
>  static int add_to_pagemap(unsigned long addr, unsigned long pfn,
>  			  struct pagemapread *pm)
>  {
> -	__put_user(pfn, pm->out);
> -	pm->out++;
> -	pm->next = addr + PAGE_SIZE;
> +	int out_len = PM_ENTRY_BYTES;
> +	if (pm->count < PM_ENTRY_BYTES)
> +		out_len = pm->count;

This wants an unlikely.

> +	copy_to_user(pm->out, &pfn, out_len);

And I think we want to keep the put_user in the fast path.

>  	pm->pos += PM_ENTRY_BYTES;
>  	pm->count -= PM_ENTRY_BYTES;
> +	if (pm->count <= 0)
> +		return PAGEMAP_END_OF_BUFFER;
>  	return 0;
>  }
>  
> +static int pagemap_pte_hole(unsigned long start, unsigned long end,
> +				void *private)
> +{
> +	struct pagemapread *pm = private;
> +	unsigned long addr;
> +	int err = 0;
> +	for (addr = start; addr < end; addr += PAGE_SIZE) {
> +		err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
> +		if (err)
> +			break;
> +	}
> +	return err;
> +}
> +
>  static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
>  			     void *private)
>  {
>  	struct pagemapread *pm = private;
>  	pte_t *pte;
> -	int err;
> +	int err = 0;
>  
>  	pte = pte_offset_map(pmd, addr);
>  	for (; addr != end; pte++, addr += PAGE_SIZE) {
> -		if (addr < pm->next)
> -			continue;
> -		if (!pte_present(*pte))
> -			err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
> -		else
> -			err = add_to_pagemap(addr, pte_pfn(*pte), pm);
> +		unsigned long pfn = PM_NOT_PRESENT;
> +		if (pte_present(*pte))
> +			pfn = pte_pfn(*pte);
> +		err = add_to_pagemap(addr, pfn, pm);
>  		if (err)
>  			return err;
> -		if (pm->count == 0)
> -			break;
>  	}
>  	pte_unmap(pte - 1);
>  
>  	cond_resched();
>  
> -	return 0;
> +	return err;
>  }
>  
> -static int pagemap_fill(struct pagemapread *pm, unsigned long end)
> +static struct mm_walk pagemap_walk =
>  {
> -	int ret;
> -
> -	while (pm->next != end && pm->count > 0) {
> -		ret = add_to_pagemap(pm->next, -1UL, pm);
> -		if (ret)
> -			return ret;
> -	}
> -	return 0;
> -}
> -
> -static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
> +	.pmd_entry = pagemap_pte_range,
> +	.pte_hole = pagemap_pte_hole
> +};
>  
>  /*
>   * /proc/pid/pagemap - an array mapping virtual pages to pfns
> @@ -589,9 +594,8 @@ static ssize_t pagemap_read(struct file 
>  	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
>  	unsigned long src = *ppos;
>  	struct page **pages, *page;
> -	unsigned long addr, end, vend, svpfn, evpfn, uaddr, uend;
> +	unsigned long uaddr, uend;
>  	struct mm_struct *mm;
> -	struct vm_area_struct *vma;
>  	struct pagemapread pm;
>  	int pagecount;
>  	int ret = -ESRCH;
> @@ -603,16 +607,10 @@ static ssize_t pagemap_read(struct file 
>  	if (!ptrace_may_attach(task))
>  		goto out;
>  
> -	ret = -EIO;
> -	svpfn = src / PM_ENTRY_BYTES;
> -	addr = PAGE_SIZE * svpfn;
> -	if (svpfn * PM_ENTRY_BYTES != src)
> +	ret = -EINVAL;
> +	/* file position must be aligned */
> +	if (*ppos % PM_ENTRY_BYTES)
>  		goto out;
> -	evpfn = min((src + count) / sizeof(unsigned long) - 1,
> -		    ((~0UL) >> PAGE_SHIFT) + 1);
> -	count = (evpfn - svpfn) * PM_ENTRY_BYTES;
> -	end = PAGE_SIZE * evpfn;
> -	//printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
>  
>  	ret = 0;
>  	mm = get_task_mm(task);
> @@ -632,44 +630,36 @@ static ssize_t pagemap_read(struct file 
>  			     1, 0, pages, NULL);
>  	up_read(&current->mm->mmap_sem);
>  
> -	//printk("%x(%x):%x %d@%ld (%d pages) -> %d\n", uaddr, buf, uend, count, src, pagecount, ret);
>  	if (ret < 0)
>  		goto out_free;
>  
> -	pm.next = addr;
>  	pm.pos = src;
>  	pm.count = count;
>  	pm.out = (unsigned long __user *)buf;
>  
> -	down_read(&mm->mmap_sem);
> -	vma = find_vma(mm, pm.next);
> -	while (pm.count > 0 && vma) {
> -		if (!ptrace_may_attach(task)) {
> -			ret = -EIO;
> -			up_read(&mm->mmap_sem);
> -			goto out_release;
> -		}
> -		vend = min(vma->vm_end - 1, end - 1) + 1;
> -		ret = pagemap_fill(&pm, vend);
> -		if (ret || !pm.count)
> -			break;
> -		vend = min(vma->vm_end - 1, end - 1) + 1;
> -		ret = walk_page_range(mm, vma->vm_start, vend,
> -				      &pagemap_walk, &pm);
> -		vma = vma->vm_next;
> +	if (!ptrace_may_attach(task)) {
> +		ret = -EIO;
> +	} else {
> +		unsigned long src = *ppos;
> +		unsigned long svpfn = src / PM_ENTRY_BYTES;
> +		unsigned long start_vaddr = svpfn << PAGE_SHIFT;
> +		unsigned long end_vaddr = TASK_SIZE_OF(task);
> +		/*
> +		 * The odds are that this will stop walking way
> +		 * before end_vaddr, because the length of the
> +		 * user buffer is tracked in "pm", and the walk
> +		 * will stop when we hit the end of the buffer.
> +		 */
> +		ret = walk_page_range(mm, start_vaddr, end_vaddr,
> +					&pagemap_walk, &pm);
> +		if (ret == PAGEMAP_END_OF_BUFFER)
> +			ret = 0;
> +		/* don't need mmap_sem for these, but this looks cleaner */
> +		*ppos = pm.pos;
> +		if (!ret)
> +			ret = pm.pos - src;
>  	}
> -	up_read(&mm->mmap_sem);
> -
> -	//printk("before fill at %ld\n", pm.pos);
> -	ret = pagemap_fill(&pm, end);
> -
> -	printk("after fill at %ld\n", pm.pos);
> -	*ppos = pm.pos;
> -	if (!ret)
> -		ret = pm.pos - src;
>  
> -out_release:
> -	printk("releasing pages\n");
>  	for (; pagecount; pagecount--) {
>  		page = pages[pagecount-1];
>  		if (!PageReserved(page))
> @@ -682,7 +672,6 @@ out_free:
>  out_task:
>  	put_task_struct(task);
>  out:
> -	printk("returning\n");
>  	return ret;
>  }
>  
> _

-- 
Mathematics is the supreme nostalgia of our time.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2007-08-21 22:01 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-21 20:42 [RFC][PATCH 1/9] /proc/pid/pagemap update Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 2/9] pagemap: remove file header Dave Hansen
2007-08-21 21:24   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
2007-08-21 21:25   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
2007-08-21 21:26   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 6/9] pagemap: give -1's a name Dave Hansen
2007-08-21 21:27   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
2007-08-30  7:28   ` Nick Piggin
2007-09-03 12:32     ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
2007-08-21 22:01   ` Matt Mackall [this message]
2007-08-21 22:38     ` Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 9/9] pagemap: export swap ptes Dave Hansen
2007-08-21 21:49   ` Matt Mackall
2007-08-21 22:26     ` Dave Hansen
2007-08-21 21:23 ` [RFC][PATCH 1/9] /proc/pid/pagemap update Matt Mackall
2007-08-21 21:26   ` Dave Hansen
2007-08-21 22:07 ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070821220103.GM30556@waste.org \
    --to=mpm@selenic.com \
    --cc=haveblue@us.ibm.com \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox