[PATCH mmotm 5/5] huge tmpfs: add shmem_pmd_fault()

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Hugh Dickins <hughd@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Andres Lagar-Cavilla <andreslc@google.com>,
	Yang Shi <yang.shi@linaro.org>, Ning Qu <quning@gmail.com>,
	Stephen Rothwell <sfr@canb.auug.org.au>,
	kernel test robot <xiaolong.ye@intel.com>,
	Xiong Zhou <jencce.kernel@gmail.com>,
	Matthew Wilcox <willy@linux.intel.com>,
	Greg Thelen <gthelen@google.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH mmotm 5/5] huge tmpfs: add shmem_pmd_fault()
Date: Sat, 16 Apr 2016 16:41:33 -0700 (PDT)	[thread overview]
Message-ID: <alpine.LSU.2.11.1604161638230.1907@eggly.anvils> (raw)
In-Reply-To: <alpine.LSU.2.11.1604161621310.1907@eggly.anvils>

The pmd_fault() method gives the filesystem an opportunity to place
a trans huge pmd entry at *pmd, before any pagetable is exposed (and
an opportunity to split it on COW fault): now use it for huge tmpfs.

This patch is a little raw: with more time before LSF/MM, I would
probably want to dress it up better - the shmem_mapping() calls look
a bit ugly; it's odd to want FAULT_FLAG_MAY_HUGE and VM_FAULT_HUGE just
for a private conversation between shmem_fault() and shmem_pmd_fault();
and there might be a better distribution of work between those two, but
prising apart that series of huge tests is not to be done in a hurry.

Good for now, presents the new way, but might be improved later.

This patch still leaves the huge tmpfs map_team_by_pmd() allocating a
pagetable while holding page lock, but other filesystems are no longer
doing so; and we've not yet settled whether huge tmpfs should (like anon
THP) or should not (like DAX) participate in deposit/withdraw protocol.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
I've been testing with this applied on top of mmotm plus 1-4/5,
but I suppose the right place for it is immediately after
huge-tmpfs-map-shmem-by-huge-page-pmd-or-by-page-team-ptes.patch
with a view to perhaps merging it into that in the future.

 mm/huge_memory.c |    4 ++--
 mm/memory.c      |   13 +++++++++----
 mm/shmem.c       |   33 +++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 6 deletions(-)

--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3084,7 +3084,7 @@ void __split_huge_pmd(struct vm_area_str
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long haddr = address & HPAGE_PMD_MASK;
 
-	if (!vma_is_anonymous(vma) && !vma->vm_ops->pmd_fault) {
+	if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
 		remap_team_by_ptes(vma, address, pmd);
 		return;
 	}
@@ -3622,7 +3622,7 @@ int map_team_by_pmd(struct vm_area_struc
 	pgtable_t pgtable;
 	spinlock_t *pml;
 	pmd_t pmdval;
-	int ret = VM_FAULT_NOPAGE;
+	int ret = 0;
 
 	/*
 	 * Another task may have mapped it in just ahead of us; but we
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3410,6 +3410,7 @@ static int __handle_mm_fault(struct mm_s
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	int ret = 0;
 
 	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
 					    flags & FAULT_FLAG_INSTRUCTION,
@@ -3426,13 +3427,16 @@ static int __handle_mm_fault(struct mm_s
 	pmd = pmd_alloc(mm, pud, address);
 	if (!pmd)
 		return VM_FAULT_OOM;
-	if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
-		int ret = create_huge_pmd(mm, vma, address, pmd, flags);
+
+	if (pmd_none(*pmd) &&
+	    (transparent_hugepage_enabled(vma) ||
+	     (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)))) {
+		ret = create_huge_pmd(mm, vma, address, pmd, flags);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
+		ret &= VM_FAULT_MAJOR;
 	} else {
 		pmd_t orig_pmd = *pmd;
-		int ret;
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
@@ -3447,6 +3451,7 @@ static int __handle_mm_fault(struct mm_s
 							orig_pmd, flags);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
+				ret = 0;
 			} else {
 				huge_pmd_set_accessed(mm, vma, address, pmd,
 						      orig_pmd, dirty);
@@ -3483,7 +3488,7 @@ static int __handle_mm_fault(struct mm_s
 	 */
 	pte = pte_offset_map(pmd, address);
 
-	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+	return ret | handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
 /*
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3223,6 +3223,36 @@ single:
 	return ret | VM_FAULT_LOCKED | VM_FAULT_HUGE;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int shmem_pmd_fault(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, unsigned int flags)
+{
+	struct vm_fault vmf;
+	int ret;
+
+	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+		/* Copy On Write: don't insert huge pmd; or split if already */
+		if (pmd_trans_huge(*pmd))
+			remap_team_by_ptes(vma, address, pmd);
+		return VM_FAULT_FALLBACK;
+	}
+
+	vmf.virtual_address = (void __user *)(address & PAGE_MASK);
+	vmf.pgoff = linear_page_index(vma, address);
+	vmf.flags = flags | FAULT_FLAG_MAY_HUGE;
+
+	ret = shmem_fault(vma, &vmf);
+	if (ret & VM_FAULT_HUGE)
+		return ret | map_team_by_pmd(vma, address, pmd, vmf.page);
+	if (ret & VM_FAULT_ERROR)
+		return ret;
+
+	unlock_page(vmf.page);
+	put_page(vmf.page);
+	return ret | VM_FAULT_FALLBACK;
+}
+#endif
+
 unsigned long shmem_get_unmapped_area(struct file *file,
 				      unsigned long uaddr, unsigned long len,
 				      unsigned long pgoff, unsigned long flags)
@@ -5129,6 +5159,9 @@ static const struct super_operations shm
 
 static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	.pmd_fault	= shmem_pmd_fault,
+#endif
 	.map_pages	= filemap_map_pages,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2016-04-16 23:41 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-16 23:27 [PATCH mmotm 1/5] huge tmpfs: try to allocate huge pages split into a team fix Hugh Dickins
2016-04-16 23:29 ` [PATCH mmotm 2/5] huge tmpfs: fix mlocked meminfo track huge unhuge mlocks fix Hugh Dickins
2016-04-20 23:48   ` Stephen Rothwell
2016-04-16 23:33 ` [PATCH mmotm 3/5] huge tmpfs recovery: tweak shmem_getpage_gfp to fill team fix Hugh Dickins
2016-04-20 23:50   ` Stephen Rothwell
2016-04-16 23:38 ` [PATCH mmotm 4/5] huge tmpfs: avoid premature exposure of new pagetable revert Hugh Dickins
2016-04-20 23:55   ` Stephen Rothwell
2016-04-16 23:41 ` Hugh Dickins [this message]
2016-04-17  0:46   ` [PATCH mmotm 5/5] huge tmpfs: add shmem_pmd_fault() Kirill A. Shutemov
2016-04-17  1:21     ` Hugh Dickins
2016-04-20 23:56   ` Stephen Rothwell
2016-04-20 23:45 ` [PATCH mmotm 1/5] huge tmpfs: try to allocate huge pages split into a team fix Stephen Rothwell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.LSU.2.11.1604161638230.1907@eggly.anvils \
    --to=hughd@google.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=andreslc@google.com \
    --cc=gthelen@google.com \
    --cc=jencce.kernel@gmail.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=quning@gmail.com \
    --cc=sfr@canb.auug.org.au \
    --cc=willy@linux.intel.com \
    --cc=xiaolong.ye@intel.com \
    --cc=yang.shi@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox