linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd()
       [not found] <050201d1c7ae$9dbf9370$d93eba50$@alibaba-inc.com>
@ 2016-06-16  9:15 ` Hillf Danton
  2016-06-16 10:17   ` Kirill A. Shutemov
  0 siblings, 1 reply; 3+ messages in thread
From: Hillf Danton @ 2016-06-16  9:15 UTC (permalink / raw)
  To: Kirill A. Shutemov; +Cc: linux-kernel, linux-mm

> +
> +static int do_set_pmd(struct fault_env *fe, struct page *page)
> +{
> +	struct vm_area_struct *vma = fe->vma;
> +	bool write = fe->flags & FAULT_FLAG_WRITE;
> +	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
> +	pmd_t entry;
> +	int i, ret;
> +
> +	if (!transhuge_vma_suitable(vma, haddr))
> +		return VM_FAULT_FALLBACK;
> +
> +	ret = VM_FAULT_FALLBACK;
> +	page = compound_head(page);
> +
> +	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
> +	if (unlikely(!pmd_none(*fe->pmd)))
> +		goto out;

Can we reply to the caller that fault is handled correctly(by
resetting ret to zero before jump)?

> +
> +	for (i = 0; i < HPAGE_PMD_NR; i++)
> +		flush_icache_page(vma, page + i);
> +
> +	entry = mk_huge_pmd(page, vma->vm_page_prot);
> +	if (write)
> +		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> +
> +	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
> +	page_add_file_rmap(page, true);
> +
> +	set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
> +
> +	update_mmu_cache_pmd(vma, haddr, fe->pmd);
> +
> +	/* fault is handled */
> +	ret = 0;
> +out:
> +	spin_unlock(fe->ptl);
> +	return ret;
> +}
> +#else
> +static int do_set_pmd(struct fault_env *fe, struct page *page)
> +{
> +	BUILD_BUG();
> +	return 0;
> +}
> +#endif
> +
>  /**
>   * alloc_set_pte - setup new PTE entry for given page and add reverse page
>   * mapping. If needed, the fucntion allocates page table or use pre-allocated.
> @@ -2940,9 +3000,19 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
>  	struct vm_area_struct *vma = fe->vma;
>  	bool write = fe->flags & FAULT_FLAG_WRITE;
>  	pte_t entry;
> +	int ret;
> +
> +	if (pmd_none(*fe->pmd) && PageTransCompound(page)) {
> +		/* THP on COW? */
> +		VM_BUG_ON_PAGE(memcg, page);
> +
> +		ret = do_set_pmd(fe, page);
> +		if (ret != VM_FAULT_FALLBACK)
> +			return ret;
> +	}
> 
>  	if (!fe->pte) {
> -		int ret = pte_alloc_one_map(fe);
> +		ret = pte_alloc_one_map(fe);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/mm/migrate.c b/mm/migrate.c

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd()
  2016-06-16  9:15 ` [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd() Hillf Danton
@ 2016-06-16 10:17   ` Kirill A. Shutemov
  0 siblings, 0 replies; 3+ messages in thread
From: Kirill A. Shutemov @ 2016-06-16 10:17 UTC (permalink / raw)
  To: Hillf Danton; +Cc: Kirill A. Shutemov, linux-kernel, linux-mm

On Thu, Jun 16, 2016 at 05:15:22PM +0800, Hillf Danton wrote:
> > +
> > +static int do_set_pmd(struct fault_env *fe, struct page *page)
> > +{
> > +	struct vm_area_struct *vma = fe->vma;
> > +	bool write = fe->flags & FAULT_FLAG_WRITE;
> > +	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
> > +	pmd_t entry;
> > +	int i, ret;
> > +
> > +	if (!transhuge_vma_suitable(vma, haddr))
> > +		return VM_FAULT_FALLBACK;
> > +
> > +	ret = VM_FAULT_FALLBACK;
> > +	page = compound_head(page);
> > +
> > +	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
> > +	if (unlikely(!pmd_none(*fe->pmd)))
> > +		goto out;
> 
> Can we reply to the caller that fault is handled correctly(by
> resetting ret to zero before jump)?

It's non necessary handled. It's handled only if the pmd if huge. If it
points to pte table, we need to check relevant pte entry.

If pmd is huge it will caught by pte_alloc_one_map() later.

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd()
  2016-06-15 20:06 ` [PATCHv9-rebased2 00/37] " Kirill A. Shutemov
@ 2016-06-15 20:06   ` Kirill A. Shutemov
  0 siblings, 0 replies; 3+ messages in thread
From: Kirill A. Shutemov @ 2016-06-15 20:06 UTC (permalink / raw)
  To: Hugh Dickins, Andrea Arcangeli, Andrew Morton
  Cc: Dave Hansen, Vlastimil Babka, Christoph Lameter, Naoya Horiguchi,
	Jerome Marchand, Yang Shi, Sasha Levin, Andres Lagar-Cavilla,
	Ning Qu, linux-kernel, linux-mm, linux-fsdevel, Ebru Akagunduz,
	Kirill A. Shutemov

With postponed page table allocation we have chance to setup huge pages.
do_set_pte() calls do_set_pmd() if following criteria met:

 - page is compound;
 - pmd entry in pmd_none();
 - vma has suitable size and alignment;

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/huge_mm.h |  2 ++
 mm/huge_memory.c        |  5 ----
 mm/memory.c             | 72 ++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/migrate.c            |  3 +--
 4 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 670ea0e3d138..3ef07cd7730c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -143,6 +143,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 struct page *get_huge_zero_page(void);
 void put_huge_zero_page(void);
 
+#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 05088abe7576..b24b7993c369 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -795,11 +795,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 	return pmd;
 }
 
-static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
-{
-	return pmd_mkhuge(mk_pmd(page, prot));
-}
-
 static inline struct list_head *page_deferred_list(struct page *page)
 {
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index 02a5491f0f17..6c0ebbc680d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2921,6 +2921,66 @@ map_pte:
 	return 0;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+		unsigned long haddr)
+{
+	if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
+			(vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
+		return false;
+	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+		return false;
+	return true;
+}
+
+static int do_set_pmd(struct fault_env *fe, struct page *page)
+{
+	struct vm_area_struct *vma = fe->vma;
+	bool write = fe->flags & FAULT_FLAG_WRITE;
+	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+	pmd_t entry;
+	int i, ret;
+
+	if (!transhuge_vma_suitable(vma, haddr))
+		return VM_FAULT_FALLBACK;
+
+	ret = VM_FAULT_FALLBACK;
+	page = compound_head(page);
+
+	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+	if (unlikely(!pmd_none(*fe->pmd)))
+		goto out;
+
+	for (i = 0; i < HPAGE_PMD_NR; i++)
+		flush_icache_page(vma, page + i);
+
+	entry = mk_huge_pmd(page, vma->vm_page_prot);
+	if (write)
+		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+
+	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
+	page_add_file_rmap(page, true);
+
+	set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+
+	update_mmu_cache_pmd(vma, haddr, fe->pmd);
+
+	/* fault is handled */
+	ret = 0;
+out:
+	spin_unlock(fe->ptl);
+	return ret;
+}
+#else
+static int do_set_pmd(struct fault_env *fe, struct page *page)
+{
+	BUILD_BUG();
+	return 0;
+}
+#endif
+
 /**
  * alloc_set_pte - setup new PTE entry for given page and add reverse page
  * mapping. If needed, the fucntion allocates page table or use pre-allocated.
@@ -2940,9 +3000,19 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
 	struct vm_area_struct *vma = fe->vma;
 	bool write = fe->flags & FAULT_FLAG_WRITE;
 	pte_t entry;
+	int ret;
+
+	if (pmd_none(*fe->pmd) && PageTransCompound(page)) {
+		/* THP on COW? */
+		VM_BUG_ON_PAGE(memcg, page);
+
+		ret = do_set_pmd(fe, page);
+		if (ret != VM_FAULT_FALLBACK)
+			return ret;
+	}
 
 	if (!fe->pte) {
-		int ret = pte_alloc_one_map(fe);
+		ret = pte_alloc_one_map(fe);
 		if (ret)
 			return ret;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 7e6e9375d654..c7531ccf65f4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1984,8 +1984,7 @@ fail_putback:
 	}
 
 	orig_entry = *pmd;
-	entry = mk_pmd(new_page, vma->vm_page_prot);
-	entry = pmd_mkhuge(entry);
+	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
 	/*
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-06-16 10:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <050201d1c7ae$9dbf9370$d93eba50$@alibaba-inc.com>
2016-06-16  9:15 ` [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd() Hillf Danton
2016-06-16 10:17   ` Kirill A. Shutemov
2016-06-06 14:06 [PATCHv9 00/32] THP-enabled tmpfs/shmem using compound pages Kirill A. Shutemov
2016-06-15 20:06 ` [PATCHv9-rebased2 00/37] " Kirill A. Shutemov
2016-06-15 20:06   ` [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd() Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox