[PATCH 5/8] mm: follow_hugetlb_page flags

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 5/8] mm: follow_hugetlb_page flags
Date: Mon, 7 Sep 2009 22:37:14 +0100 (BST)	[thread overview]
Message-ID: <Pine.LNX.4.64.0909072235360.15430@sister.anvils> (raw)
In-Reply-To: <Pine.LNX.4.64.0909072222070.15424@sister.anvils>

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
---

 include/linux/hugetlb.h |    4 +-
 mm/hugetlb.c            |   62 ++++++++++++++++++++++----------------
 mm/memory.c             |   14 ++++----
 3 files changed, 48 insertions(+), 32 deletions(-)

--- mm4/include/linux/hugetlb.h	2009-09-05 14:40:16.000000000 +0100
+++ mm5/include/linux/hugetlb.h	2009-09-07 13:16:46.000000000 +0100
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_ta
 int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
+int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+			struct page **, struct vm_area_struct **,
+			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range(struct vm_area_struct *,
--- mm4/mm/hugetlb.c	2009-09-05 14:40:16.000000000 +0100
+++ mm5/mm/hugetlb.c	2009-09-07 13:16:46.000000000 +0100
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_
 	return find_lock_page(mapping, idx);
 }
 
+/* Return whether there is a pagecache page to back given address within VMA */
+static bool hugetlbfs_backed(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct address_space *mapping;
+	pgoff_t idx;
+	struct page *page;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = vma_hugecache_offset(h, vma, address);
+
+	page = find_get_page(mapping, idx);
+	if (page)
+		put_page(page);
+	return page != NULL;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, unsigned int flags)
 {
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, un
 	return NULL;
 }
 
-static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
-{
-	if (!ptep || write || shared)
-		return 0;
-	else
-		return huge_pte_none(huge_ptep_get(ptep));
-}
-
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
-			int write)
+			unsigned int flags)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
-	int zeropage_ok = 0;
-	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
+		int absent;
 		struct page *page;
 
 		/*
 		 * Some archs (sparc64, sh*) have multiple pte_ts to
-		 * each hugepage.  We have to make * sure we get the
+		 * each hugepage.  We have to make sure we get the
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
-		if (huge_zeropage_ok(pte, write, shared))
-			zeropage_ok = 1;
+		absent = !pte || huge_pte_none(huge_ptep_get(pte));
+
+		/*
+		 * When coredumping, it suits get_dump_page if we just return
+		 * an error if there's a hole and no huge pagecache to back it.
+		 */
+		if (absent &&
+		    ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+			remainder = 0;
+			break;
+		}
 
-		if (!pte ||
-		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
-		    (write && !pte_write(huge_ptep_get(pte)))) {
+		if (absent ||
+		    ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
 			spin_unlock(&mm->page_table_lock);
-			ret = hugetlb_fault(mm, vma, vaddr, write);
+			ret = hugetlb_fault(mm, vma, vaddr,
+				(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
 			spin_lock(&mm->page_table_lock);
 			if (!(ret & VM_FAULT_ERROR))
 				continue;
 
 			remainder = 0;
-			if (!i)
-				i = -EFAULT;
 			break;
 		}
 
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			if (zeropage_ok)
-				pages[i] = ZERO_PAGE(0);
-			else
-				pages[i] = mem_map_offset(page, pfn_offset);
+			pages[i] = mem_map_offset(page, pfn_offset);
 			get_page(pages[i]);
 		}
 
@@ -2293,7 +2305,7 @@ same_page:
 	*length = remainder;
 	*position = vaddr;
 
-	return i;
+	return i ? i : -EFAULT;
 }
 
 void hugetlb_change_protection(struct vm_area_struct *vma,
--- mm4/mm/memory.c	2009-09-07 13:16:39.000000000 +0100
+++ mm5/mm/memory.c	2009-09-07 13:16:46.000000000 +0100
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &nr_pages, i, write);
-			continue;
-		}
-
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
 		if (flags & GUP_FLAGS_DUMP)
 			foll_flags |= FOLL_DUMP;
+		if (write)
+			foll_flags |= FOLL_WRITE;
+
+		if (is_vm_hugetlb_page(vma)) {
+			i = follow_hugetlb_page(mm, vma, pages, vmas,
+					&start, &nr_pages, i, foll_flags);
+			continue;
+		}
 
 		do {
 			struct page *page;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2009-09-07 21:37 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-07 21:26 [PATCH 0/8] mm: around get_user_pages flags Hugh Dickins
2009-09-07 21:29 ` [PATCH 1/8] mm: munlock use follow_page Hugh Dickins
2009-09-08  2:58   ` KAMEZAWA Hiroyuki
2009-09-08 11:30     ` Hugh Dickins
2009-09-08 17:10   ` Rik van Riel
2009-09-09 15:59   ` Minchan Kim
2009-09-11 11:07   ` Hiroaki Wakabayashi
2009-09-07 21:31 ` [PATCH 2/8] mm: remove unused GUP flags Hugh Dickins
2009-09-08 17:27   ` Rik van Riel
2009-09-07 21:33 ` [PATCH 3/8] mm: add get_dump_page Hugh Dickins
2009-09-08 18:57   ` Rik van Riel
2009-09-07 21:35 ` [PATCH 4/8] mm: FOLL_DUMP replace FOLL_ANON Hugh Dickins
2009-09-08 18:59   ` Rik van Riel
2009-09-09 11:14   ` Mel Gorman
2009-09-09 16:16   ` Minchan Kim
2009-09-13 15:46     ` Hugh Dickins
2009-09-13 23:05       ` Minchan Kim
2009-09-07 21:37 ` Hugh Dickins [this message]
2009-09-08 22:21   ` [PATCH 5/8] mm: follow_hugetlb_page flags Rik van Riel
2009-09-09 11:31   ` Mel Gorman
2009-09-13 15:35     ` Hugh Dickins
2009-09-14 13:27       ` Mel Gorman
2009-09-15 20:26         ` Hugh Dickins
2009-09-07 21:38 ` [PATCH 6/8] mm: fix anonymous dirtying Hugh Dickins
2009-09-08 22:23   ` Rik van Riel
2009-09-07 21:39 ` [PATCH 7/8] mm: reinstate ZERO_PAGE Hugh Dickins
2009-09-08  2:37   ` KAMEZAWA Hiroyuki
2009-09-08 11:56     ` Hugh Dickins
2009-09-09  1:44       ` KAMEZAWA Hiroyuki
2009-09-15 20:15         ` Hugh Dickins
2009-09-08  7:31   ` Nick Piggin
2009-09-08 12:17     ` Hugh Dickins
2009-09-08 15:34       ` Nick Piggin
2009-09-08 16:40         ` Hugh Dickins
2009-09-08 14:13     ` Linus Torvalds
2009-09-08 23:35   ` Rik van Riel
2009-09-07 21:40 ` [PATCH 8/8] mm: FOLL flags for GUP flags Hugh Dickins
2009-09-07 23:51 ` [PATCH 0/8] mm: around get_user_pages flags Linus Torvalds
2009-09-07 23:52 ` KAMEZAWA Hiroyuki
2009-09-08  0:00 ` KOSAKI Motohiro
2009-09-10  0:33   ` KOSAKI Motohiro
2009-09-15 20:16     ` Hugh Dickins
2009-09-15 20:30 ` [PATCH 0/4] mm: mlock, hugetlb, zero followups Hugh Dickins
2009-09-15 20:31   ` [PATCH 1/4] mm: m(un)lock avoid ZERO_PAGE Hugh Dickins
2009-09-16  0:08     ` KOSAKI Motohiro
2009-09-16  9:35     ` Mel Gorman
2009-09-16 11:40       ` Hugh Dickins
2009-09-16 12:47         ` Mel Gorman
2009-09-15 20:33   ` [PATCH 2/4] mm: hugetlbfs_pagecache_present Hugh Dickins
2009-09-15 20:37   ` [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL Hugh Dickins
2009-09-16  6:20     ` KAMEZAWA Hiroyuki
2009-09-15 20:38   ` [PATCH 4/4] mm: move highest_memmap_pfn Hugh Dickins
2009-09-17  0:33   ` [PATCH 0/4] mm: mlock, hugetlb, zero followups KOSAKI Motohiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0909072235360.15430@sister.anvils \
    --to=hugh.dickins@tiscali.co.uk \
    --cc=akpm@linux-foundation.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox