linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <haveblue@us.ibm.com>
To: mpm@selenic.com
Cc: linux-mm@kvack.org, Dave Hansen <haveblue@us.ibm.com>
Subject: [RFC][PATCH 1/9] /proc/pid/pagemap update
Date: Tue, 21 Aug 2007 13:42:48 -0700	[thread overview]
Message-ID: <20070821204248.0F506A29@kernel> (raw)

This is a series of patches to update /proc/pid/pagemap,
to simplify the code, and fix bugs which caused userspace
memory corruption.

Since it is just in -mm, we should probably roll all of
these together and just update it all at once, or send
a simple drop-on replacement patch.  These patches are
all here mostly to help with review.

Matt, if you're OK with these, do you mind if I send
the update into -mm, or would you like to do it?

--
From: Matt Mackall <mpm@selenic.com>

On Mon, Aug 06, 2007 at 01:44:19AM -0500, Dave Boutcher wrote:
> 
> Matt, this patch set replaces the two patches I sent earlier and
> contains additional fixes.  I've done some reasonably rigorous testing
> on x86_64, but not on a 32 bit arch.  I'm pretty sure this isn't worse
> than what's in mm right now, which has some user-space corruption and
> a nasty infinite kernel loop. YMMV.

Dave, here's my current work-in-progress patch to deal with a couple
locking issues, primarily a possible deadlock on the mm semaphore that
can occur if someone unmaps the target buffer while we're walking the
tree. It currently hangs on my box and I haven't had any free cycles
to finish debugging it, but you might want to take a peek at it.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 lxc-dave/fs/proc/task_mmu.c |  121 ++++++++++++++++++++------------------------
 1 file changed, 55 insertions(+), 66 deletions(-)

diff -puN fs/proc/task_mmu.c~Re-_PATCH_0_3_proc_pid_pagemap_fixes fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~Re-_PATCH_0_3_proc_pid_pagemap_fixes	2007-08-21 13:30:50.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c	2007-08-21 13:30:50.000000000 -0700
@@ -501,37 +501,21 @@ const struct file_operations proc_clear_
 };
 
 struct pagemapread {
-	struct mm_struct *mm;
 	unsigned long next;
-	unsigned long *buf;
-	pte_t *ptebuf;
 	unsigned long pos;
 	size_t count;
 	int index;
-	char __user *out;
+	unsigned long __user *out;
 };
 
-static int flush_pagemap(struct pagemapread *pm)
-{
-	int n = min(pm->count, pm->index * sizeof(unsigned long));
-	if (copy_to_user(pm->out, pm->buf, n))
-		return -EFAULT;
-	pm->out += n;
-	pm->pos += n;
-	pm->count -= n;
-	pm->index = 0;
-	cond_resched();
-	return 0;
-}
-
 static int add_to_pagemap(unsigned long addr, unsigned long pfn,
 			  struct pagemapread *pm)
 {
-	pm->buf[pm->index++] = pfn;
+	__put_user(pfn, pm->out);
+	pm->out++;
+	pm->pos += sizeof(unsigned long);
+	pm->count -= sizeof(unsigned long);
 	pm->next = addr + PAGE_SIZE;
-	if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
-	    pm->index * sizeof(unsigned long) >= pm->count)
-		return flush_pagemap(pm);
 	return 0;
 }
 
@@ -543,14 +527,6 @@ static int pagemap_pte_range(pmd_t *pmd,
 	int err;
 
 	pte = pte_offset_map(pmd, addr);
-
-#ifdef CONFIG_HIGHPTE
-	/* copy PTE directory to temporary buffer and unmap it */
-	memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte);
-	pte_unmap(pte);
-	pte = pm->ptebuf;
-#endif
-
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		if (addr < pm->next)
 			continue;
@@ -560,11 +536,12 @@ static int pagemap_pte_range(pmd_t *pmd,
 			err = add_to_pagemap(addr, pte_pfn(*pte), pm);
 		if (err)
 			return err;
+		if (pm->count == 0)
+			break;
 	}
-
-#ifndef CONFIG_HIGHPTE
 	pte_unmap(pte - 1);
-#endif
+
+	cond_resched();
 
 	return 0;
 }
@@ -573,7 +550,7 @@ static int pagemap_fill(struct pagemapre
 {
 	int ret;
 
-	while (pm->next != end) {
+	while (pm->next != end && pm->count > 0) {
 		ret = add_to_pagemap(pm->next, -1UL, pm);
 		if (ret)
 			return ret;
@@ -608,15 +585,16 @@ static ssize_t pagemap_read(struct file 
 {
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	unsigned long src = *ppos;
-	unsigned long *page;
-	unsigned long addr, end, vend, svpfn, evpfn;
+	struct page **pages, *page;
+	unsigned long addr, end, vend, svpfn, evpfn, uaddr, uend;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	struct pagemapread pm;
+	int pagecount;
 	int ret = -ESRCH;
 
 	if (!task)
-		goto out_no_task;
+		goto out;
 
 	ret = -EACCES;
 	if (!ptrace_may_attach(task))
@@ -628,39 +606,43 @@ static ssize_t pagemap_read(struct file 
 	if ((svpfn + 1) * sizeof(unsigned long) != src)
 		goto out;
 	evpfn = min((src + count) / sizeof(unsigned long) - 1,
-		    ((~0UL) >> PAGE_SHIFT) + 1);
+		    ((~0UL) >> PAGE_SHIFT) + 1) - 1;
 	count = (evpfn - svpfn) * sizeof(unsigned long);
 	end = PAGE_SIZE * evpfn;
-
-	ret = -ENOMEM;
-	page = kzalloc(PAGE_SIZE, GFP_USER);
-	if (!page)
-		goto out;
-
-#ifdef CONFIG_HIGHPTE
-	pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER);
-	if (!pm.ptebuf)
-		goto out_free;
-#endif
+	//printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
 
 	ret = 0;
 	mm = get_task_mm(task);
 	if (!mm)
-		goto out_freepte;
+		goto out;
+
+	ret = -ENOMEM;
+	uaddr = (unsigned long)buf & ~(PAGE_SIZE-1);
+	uend = (unsigned long)(buf + count);
+	pagecount = (uend - uaddr + PAGE_SIZE-1) / PAGE_SIZE;
+	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_task;
+
+	down_read(&current->mm->mmap_sem);
+	ret = get_user_pages(current, current->mm, uaddr, pagecount,
+			     1, 0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	//printk("%x(%x):%x %d@%ld (%d pages) -> %d\n", uaddr, buf, uend, count, src, pagecount, ret);
+	if (ret < 0)
+		goto out_free;
 
-	pm.mm = mm;
 	pm.next = addr;
-	pm.buf = page;
 	pm.pos = src;
 	pm.count = count;
-	pm.index = 0;
-	pm.out = buf;
+	pm.out = (unsigned long __user *)buf;
 
 	if (svpfn == -1) {
-		((char *)page)[0] = (ntohl(1) != 1);
-		((char *)page)[1] = PAGE_SHIFT;
-		((char *)page)[2] = sizeof(unsigned long);
-		((char *)page)[3] = sizeof(unsigned long);
+		put_user((char)(ntohl(1) != 1), buf);
+		put_user((char)PAGE_SHIFT, buf + 1);
+		put_user((char)sizeof(unsigned long), buf + 2);
+		put_user((char)sizeof(unsigned long), buf + 3);
 		add_to_pagemap(pm.next, page[0], &pm);
 	}
 
@@ -669,7 +651,8 @@ static ssize_t pagemap_read(struct file 
 	while (pm.count > 0 && vma) {
 		if (!ptrace_may_attach(task)) {
 			ret = -EIO;
-			goto out_mm;
+			up_read(&mm->mmap_sem);
+			goto out_release;
 		}
 		vend = min(vma->vm_end - 1, end - 1) + 1;
 		ret = pagemap_fill(&pm, vend);
@@ -682,23 +665,29 @@ static ssize_t pagemap_read(struct file 
 	}
 	up_read(&mm->mmap_sem);
 
+	//printk("before fill at %ld\n", pm.pos);
 	ret = pagemap_fill(&pm, end);
 
+	printk("after fill at %ld\n", pm.pos);
 	*ppos = pm.pos;
 	if (!ret)
 		ret = pm.pos - src;
 
-out_mm:
+out_release:
+	printk("releasing pages\n");
+	for (; pagecount; pagecount--) {
+		page = pages[pagecount-1];
+		if (!PageReserved(page))
+			SetPageDirty(page);
+		page_cache_release(page);
+	}
 	mmput(mm);
-out_freepte:
-#ifdef CONFIG_HIGHPTE
-	kfree(pm.ptebuf);
 out_free:
-#endif
-	kfree(page);
-out:
+	kfree(pages);
+out_task:
 	put_task_struct(task);
-out_no_task:
+out:
+	printk("returning\n");
 	return ret;
 }
 
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

             reply	other threads:[~2007-08-21 20:44 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-21 20:42 Dave Hansen [this message]
2007-08-21 20:42 ` [RFC][PATCH 2/9] pagemap: remove file header Dave Hansen
2007-08-21 21:24   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
2007-08-21 21:25   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
2007-08-21 21:26   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 6/9] pagemap: give -1's a name Dave Hansen
2007-08-21 21:27   ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
2007-08-30  7:28   ` Nick Piggin
2007-09-03 12:32     ` Matt Mackall
2007-08-21 20:42 ` [RFC][PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
2007-08-21 22:01   ` Matt Mackall
2007-08-21 22:38     ` Dave Hansen
2007-08-21 20:42 ` [RFC][PATCH 9/9] pagemap: export swap ptes Dave Hansen
2007-08-21 21:49   ` Matt Mackall
2007-08-21 22:26     ` Dave Hansen
2007-08-21 21:23 ` [RFC][PATCH 1/9] /proc/pid/pagemap update Matt Mackall
2007-08-21 21:26   ` Dave Hansen
2007-08-21 22:07 ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070821204248.0F506A29@kernel \
    --to=haveblue@us.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=mpm@selenic.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox