[PATCH v1 05/12] mm: thp: check pmd migration entry in common path

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: zi.yan@sent.com
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: benh@kernel.crashing.org, mgorman@techsingularity.net,
	kirill.shutemov@linux.intel.com, akpm@linux-foundation.org,
	dave.hansen@linux.intel.com, n-horiguchi@ah.jp.nec.com,
	Zi Yan <zi.yan@cs.rutgers.edu>
Subject: [PATCH v1 05/12] mm: thp: check pmd migration entry in common path
Date: Mon, 26 Sep 2016 11:22:27 -0400	[thread overview]
Message-ID: <20160926152234.14809-6-zi.yan@sent.com> (raw)
In-Reply-To: <20160926152234.14809-1-zi.yan@sent.com>

From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>

If one of callers of page migration starts to handle thp, memory management code
start to see pmd migration entry, so we need to prepare for it before enabling.
This patch changes various code point which checks the status of given pmds in
order to prevent race between thp migration and the pmd-related works.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Zi Yan <zi.yan@cs.rutgers.edu>
---
 arch/x86/mm/gup.c  |  3 +++
 fs/proc/task_mmu.c | 20 +++++++-------
 mm/gup.c           |  8 ++++++
 mm/huge_memory.c   | 76 +++++++++++++++++++++++++++++++++++++++++++++++-------
 mm/memcontrol.c    |  2 ++
 mm/memory.c        |  5 ++++
 6 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index b8b6a60..72d0bef 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -10,6 +10,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/memremap.h>
+#include <linux/swapops.h>
 
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -225,6 +226,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 		if (pmd_none(pmd))
 			return 0;
 		if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+			if (unlikely(is_pmd_migration_entry(pmd)))
+				return 0;
 			/*
 			 * NUMA hinting faults need to be handled in the GUP
 			 * slowpath for accounting purposes and so that they
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f6fa99e..60f6ce3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -931,6 +931,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
+		if (unlikely(is_pmd_migration_entry(*pmd)))
+			goto out;
+
 		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
 			clear_soft_dirty_pmd(vma, addr, pmd);
 			goto out;
@@ -1215,19 +1218,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 	if (ptl) {
 		u64 flags = 0, frame = 0;
 		pmd_t pmd = *pmdp;
+		struct page *page;
 
 		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
 			flags |= PM_SOFT_DIRTY;
 
-		/*
-		 * Currently pmd for thp is always present because thp
-		 * can not be swapped-out, migrated, or HWPOISONed
-		 * (split in such cases instead.)
-		 * This if-check is just to prepare for future implementation.
-		 */
-		if (pmd_present(pmd)) {
-			struct page *page = pmd_page(pmd);
-
+		if (is_pmd_migration_entry(pmd)) {
+			swp_entry_t entry = pmd_to_swp_entry(pmd);
+			frame = swp_type(entry) |
+				(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+			page = migration_entry_to_page(entry);
+		} else if (pmd_present(pmd)) {
+			page = pmd_page(pmd);
 			if (page_mapcount(page) == 1)
 				flags |= PM_MMAP_EXCLUSIVE;
 
diff --git a/mm/gup.c b/mm/gup.c
index 96b2b2f..ef56be2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -272,6 +272,11 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 		spin_unlock(ptl);
 		return follow_page_pte(vma, address, pmd, flags);
 	}
+	if (is_pmd_migration_entry(*pmd)) {
+		spin_unlock(ptl);
+		return no_page_table(vma, flags);
+	}
+
 	if (flags & FOLL_SPLIT) {
 		int ret;
 		page = pmd_page(*pmd);
@@ -1362,6 +1367,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			return 0;
 
 		if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+			if (unlikely(is_pmd_migration_entry(pmd)))
+				return 0;
+
 			/*
 			 * NUMA hinting faults need to be handled in the GUP
 			 * slowpath for accounting purposes and so that they
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0cd39ef..f4fcfc7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -787,6 +787,19 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		goto out_unlock;
 	}
 
+	if (unlikely(is_pmd_migration_entry(pmd))) {
+		swp_entry_t entry = pmd_to_swp_entry(pmd);
+
+		if (is_write_migration_entry(entry)) {
+			make_migration_entry_read(&entry);
+			pmd = swp_entry_to_pmd(entry);
+			set_pmd_at(src_mm, addr, src_pmd, pmd);
+		}
+		set_pmd_at(dst_mm, addr, dst_pmd, pmd);
+		ret = 0;
+		goto out_unlock;
+	}
+
 	src_page = pmd_page(pmd);
 	VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
 	get_page(src_page);
@@ -952,6 +965,9 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 		goto out_unlock;
 
+	if (unlikely(is_pmd_migration_entry(*fe->pmd)))
+		goto out_unlock;
+
 	page = pmd_page(orig_pmd);
 	VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
 	/*
@@ -1077,7 +1093,15 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
 		goto out;
 
-	page = pmd_page(*pmd);
+	if (is_pmd_migration_entry(*pmd)) {
+		swp_entry_t entry;
+		entry = pmd_to_swp_entry(*pmd);
+		if (!is_migration_entry(entry))
+			goto out;
+		page = pfn_to_page(swp_offset(entry));
+	} else
+		page = pmd_page(*pmd);
+
 	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
 	if (flags & FOLL_TOUCH)
 		touch_pmd(vma, addr, pmd);
@@ -1273,6 +1297,9 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	if (is_huge_zero_pmd(orig_pmd))
 		goto out;
 
+	if (unlikely(is_pmd_migration_entry(orig_pmd)))
+		goto out;
+
 	page = pmd_page(orig_pmd);
 	/*
 	 * If other processes are mapping this page, we couldn't discard
@@ -1348,21 +1375,40 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		spin_unlock(ptl);
 		tlb_remove_page(tlb, pmd_page(orig_pmd));
 	} else {
-		struct page *page = pmd_page(orig_pmd);
-		page_remove_rmap(page, true);
-		VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
-		VM_BUG_ON_PAGE(!PageHead(page), page);
-		if (PageAnon(page)) {
+		struct page *page;
+		int migration = 0;
+
+		if (!is_pmd_migration_entry(orig_pmd)) {
+			page = pmd_page(orig_pmd);
+			page_remove_rmap(page, true);
+			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+			VM_BUG_ON_PAGE(!PageHead(page), page);
+			if (PageAnon(page)) {
+				pgtable_t pgtable;
+				pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+				pte_free(tlb->mm, pgtable);
+				atomic_long_dec(&tlb->mm->nr_ptes);
+				add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+			} else {
+				add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
+			}
+		} else {
+			swp_entry_t entry;
 			pgtable_t pgtable;
+
+			entry = pmd_to_swp_entry(orig_pmd);
+			free_swap_and_cache(entry); /* waring in failure? */
+
+			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
 			pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
 			pte_free(tlb->mm, pgtable);
 			atomic_long_dec(&tlb->mm->nr_ptes);
-			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
-		} else {
-			add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
+
+			migration = 1;
 		}
 		spin_unlock(ptl);
-		tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
+		if (!migration)
+			tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
 	}
 	return 1;
 }
@@ -1445,6 +1491,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			return ret;
 		}
 
+		if (is_pmd_migration_entry(*pmd)) {
+			spin_unlock(ptl);
+			return ret;
+		}
+
 		if (!prot_numa || !pmd_protnone(*pmd)) {
 			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
 			entry = pmd_modify(entry, newprot);
@@ -1656,6 +1707,11 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
 	if (pmd_trans_huge(*pmd)) {
 		page = pmd_page(*pmd);
+
+		if (is_pmd_migration_entry(*pmd)) {
+			goto out;
+		}
+
 		if (PageMlocked(page))
 			clear_page_mlock(page);
 	} else if (!pmd_devmap(*pmd))
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4be518d..421ac4ff 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4649,6 +4649,8 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
 	struct page *page = NULL;
 	enum mc_target_type ret = MC_TARGET_NONE;
 
+	if (unlikely(is_pmd_migration_entry(pmd)))
+		return ret;
 	page = pmd_page(pmd);
 	VM_BUG_ON_PAGE(!page || !PageHead(page), page);
 	if (!(mc.flags & MOVE_ANON))
diff --git a/mm/memory.c b/mm/memory.c
index 83be99d..3ad3bb2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3590,6 +3590,11 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
+			if (unlikely(is_pmd_migration_entry(orig_pmd))) {
+				pmd_migration_entry_wait(mm, fe.pmd);
+				return 0;
+			}
+
 			if (pmd_protnone(orig_pmd))
 				return do_huge_pmd_numa_page(&fe, orig_pmd);
 
-- 
2.9.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2016-09-26 15:24 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-26 15:22 [PATCH v1 00/12] mm: THP migration support zi.yan
2016-09-26 15:22 ` [PATCH v1 01/12] mm: mempolicy: add queue_pages_node_check() zi.yan
2016-09-26 15:22 ` [PATCH v1 02/12] mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION zi.yan
2016-09-26 15:22 ` [PATCH v1 03/12] mm: thp: add helpers related to thp/pmd migration zi.yan
2016-09-26 15:22 ` [PATCH v1 04/12] mm: thp: enable thp migration in generic path zi.yan
2016-09-26 15:22 ` zi.yan [this message]
2016-09-26 15:22 ` [PATCH v1 06/12] mm: soft-dirty: keep soft-dirty bits over thp migration zi.yan
2016-09-26 15:22 ` [PATCH v1 07/12] mm: hwpoison: fix race between unpoisoning and freeing migrate source page zi.yan
2016-09-26 15:22 ` [PATCH v1 08/12] mm: hwpoison: soft offline supports thp migration zi.yan
2016-09-26 15:22 ` [PATCH v1 09/12] mm: mempolicy: mbind and migrate_pages support " zi.yan
2016-09-26 15:22 ` [PATCH v1 10/12] mm: migrate: move_pages() supports " zi.yan
2016-09-26 15:22 ` [PATCH v1 11/12] mm: memory_hotplug: memory hotremove " zi.yan
2016-09-26 15:22 ` [PATCH v1 12/12] mm: ppc64: Add THP migration support for ppc64 zi.yan
2016-09-30  0:02   ` Balbir Singh
2016-09-30  5:18   ` Aneesh Kumar K.V
2016-09-26 15:38 ` [PATCH v1 00/12] THP migration support Zi Yan
2016-09-29  8:25   ` Naoya Horiguchi
2016-09-30  2:32     ` Zi Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160926152234.14809-6-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=akpm@linux-foundation.org \
    --cc=benh@kernel.crashing.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=zi.yan@cs.rutgers.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox