linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
	David Hildenbrand <david@redhat.com>,
	 David Rientjes <rientjes@google.com>,
	Michal Hocko <mhocko@suse.com>,
	 Pasha Tatashin <pasha.tatashin@soleen.com>,
	SeongJae Park <sj@kernel.org>,  Song Liu <songliubraving@fb.com>,
	Vlastimil Babka <vbabka@suse.cz>, Zi Yan <ziy@nvidia.com>,
	 linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	 Arnd Bergmann <arnd@arndb.de>,
	Axel Rasmussen <axelrasmussen@google.com>,
	 Chris Kennelly <ckennelly@google.com>,
	Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
	 Hugh Dickins <hughd@google.com>,
	Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
	 "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
	Jens Axboe <axboe@kernel.dk>,
	 "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Matthew Wilcox <willy@infradead.org>,
	 Matt Turner <mattst88@gmail.com>,
	Max Filippov <jcmvbkbc@gmail.com>,
	 Miaohe Lin <linmiaohe@huawei.com>,
	Minchan Kim <minchan@kernel.org>,
	 Patrick Xia <patrickx@google.com>,
	Pavel Begunkov <asml.silence@gmail.com>,
	 Peter Xu <peterx@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	 Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	Yang Shi <shy828301@gmail.com>,
	 "Zach O'Keefe" <zokeefe@google.com>
Subject: [RFC PATCH 03/14] mm/khugepaged: add __do_collapse_huge_page() helper
Date: Tue,  8 Mar 2022 13:34:06 -0800	[thread overview]
Message-ID: <20220308213417.1407042-4-zokeefe@google.com> (raw)
In-Reply-To: <20220308213417.1407042-1-zokeefe@google.com>

collapse_huge_page currently does: (1) possibly allocates a hugepage,
(2) charges the owning memcg, (3) swaps in swapped-out pages (4) the
actual collapse (copying of pages, installation of huge pmd), and (5)
some final memcg accounting in error path.

Separate out (4) so that it can be reused by itself later in the series.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 178 +++++++++++++++++++++++++++---------------------
 1 file changed, 100 insertions(+), 78 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 36fc0099c445..e3399a451662 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1058,85 +1058,23 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	return true;
 }
 
-static void collapse_huge_page(struct mm_struct *mm,
-				   unsigned long address,
-				   struct page **hpage,
-				   int node, int referenced, int unmapped,
-				   int enforce_pte_scan_limits)
-{
-	LIST_HEAD(compound_pagelist);
-	pmd_t *pmd, _pmd;
+static int __do_collapse_huge_page(struct mm_struct *mm,
+				   struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmd,
+				   struct page *new_page,
+				   int enforce_pte_scan_limits,
+				   int *isolated_out)
+{
+	pmd_t _pmd;
 	pte_t *pte;
 	pgtable_t pgtable;
-	struct page *new_page;
 	spinlock_t *pmd_ptl, *pte_ptl;
-	int isolated = 0, result = 0;
-	struct vm_area_struct *vma;
+	int isolated = 0, result = SCAN_SUCCEED;
 	struct mmu_notifier_range range;
-	gfp_t gfp;
-
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-	/* Only allocate from the target node */
-	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
-
-	/*
-	 * Before allocating the hugepage, release the mmap_lock read lock.
-	 * The allocation can take potentially a long time if it involves
-	 * sync compaction, and we do not need to hold the mmap_lock during
-	 * that. We will recheck the vma after taking it again in write mode.
-	 */
-	mmap_read_unlock(mm);
-	new_page = khugepaged_alloc_page(hpage, gfp, node);
-	if (!new_page) {
-		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
-		goto out_nolock;
-	}
-
-	if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
-		result = SCAN_CGROUP_CHARGE_FAIL;
-		goto out_nolock;
-	}
-	count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
-
-	mmap_read_lock(mm);
-	result = hugepage_vma_revalidate(mm, address, &vma);
-	if (result) {
-		mmap_read_unlock(mm);
-		goto out_nolock;
-	}
-
-	pmd = mm_find_pmd(mm, address);
-	if (!pmd) {
-		result = SCAN_PMD_NULL;
-		mmap_read_unlock(mm);
-		goto out_nolock;
-	}
-
-	/*
-	 * __collapse_huge_page_swapin always returns with mmap_lock locked.
-	 * If it fails, we release mmap_lock and jump out_nolock.
-	 * Continuing to collapse causes inconsistency.
-	 */
-	if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
-						     pmd, referenced)) {
-		mmap_read_unlock(mm);
-		goto out_nolock;
-	}
+	LIST_HEAD(compound_pagelist);
 
-	mmap_read_unlock(mm);
-	/*
-	 * Prevent all access to pagetables with the exception of
-	 * gup_fast later handled by the ptep_clear_flush and the VM
-	 * handled by the anon_vma lock + PG_lock.
-	 */
-	mmap_write_lock(mm);
-	result = hugepage_vma_revalidate(mm, address, &vma);
-	if (result)
-		goto out_up_write;
-	/* check if the pmd is still valid */
-	if (mm_find_pmd(mm, address) != pmd)
-		goto out_up_write;
+	VM_BUG_ON(!new_page);
+	mmap_assert_write_locked(mm);
 
 	anon_vma_lock_write(vma->anon_vma);
 
@@ -1176,7 +1114,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		spin_unlock(pmd_ptl);
 		anon_vma_unlock_write(vma->anon_vma);
 		result = SCAN_FAIL;
-		goto out_up_write;
+		goto out;
 	}
 
 	/*
@@ -1208,11 +1146,95 @@ static void collapse_huge_page(struct mm_struct *mm,
 	set_pmd_at(mm, address, pmd, _pmd);
 	update_mmu_cache_pmd(vma, address, pmd);
 	spin_unlock(pmd_ptl);
+out:
+	if (isolated_out)
+		*isolated_out = isolated;
+	return result;
+}
 
-	*hpage = NULL;
 
-	khugepaged_pages_collapsed++;
-	result = SCAN_SUCCEED;
+static void collapse_huge_page(struct mm_struct *mm,
+			       unsigned long address,
+			       struct page **hpage,
+			       int node, int referenced, int unmapped,
+			       int enforce_pte_scan_limits)
+{
+	pmd_t *pmd;
+	struct page *new_page;
+	int isolated = 0, result = 0;
+	struct vm_area_struct *vma;
+	gfp_t gfp;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	/* Only allocate from the target node */
+	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
+
+	/*
+	 * Before allocating the hugepage, release the mmap_lock read lock.
+	 * The allocation can take potentially a long time if it involves
+	 * sync compaction, and we do not need to hold the mmap_lock during
+	 * that. We will recheck the vma after taking it again in write mode.
+	 */
+	mmap_read_unlock(mm);
+	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	if (!new_page) {
+		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+		goto out_nolock;
+	}
+
+	if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
+		result = SCAN_CGROUP_CHARGE_FAIL;
+		goto out_nolock;
+	}
+	count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
+
+	mmap_read_lock(mm);
+	result = hugepage_vma_revalidate(mm, address, &vma);
+	if (result) {
+		mmap_read_unlock(mm);
+		goto out_nolock;
+	}
+
+	pmd = mm_find_pmd(mm, address);
+	if (!pmd) {
+		result = SCAN_PMD_NULL;
+		mmap_read_unlock(mm);
+		goto out_nolock;
+	}
+
+	/*
+	 * __collapse_huge_page_swapin always returns with mmap_lock locked.
+	 * If it fails, we release mmap_lock and jump out_nolock.
+	 * Continuing to collapse causes inconsistency.
+	 */
+	if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
+						     pmd, referenced)) {
+		mmap_read_unlock(mm);
+		goto out_nolock;
+	}
+
+	mmap_read_unlock(mm);
+	/*
+	 * Prevent all access to pagetables with the exception of
+	 * gup_fast later handled by the ptep_clear_flush and the VM
+	 * handled by the anon_vma lock + PG_lock.
+	 */
+	mmap_write_lock(mm);
+
+	result = hugepage_vma_revalidate(mm, address, &vma);
+	if (result)
+		goto out_up_write;
+	/* check if the pmd is still valid */
+	if (mm_find_pmd(mm, address) != pmd)
+		goto out_up_write;
+
+	result = __do_collapse_huge_page(mm, vma, address, pmd, new_page,
+					 enforce_pte_scan_limits, &isolated);
+	if (result == SCAN_SUCCEED) {
+		*hpage = NULL;
+		khugepaged_pages_collapsed++;
+	}
 out_up_write:
 	mmap_write_unlock(mm);
 out_nolock:
-- 
2.35.1.616.g0bdcbb4464-goog



  parent reply	other threads:[~2022-03-08 21:34 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-08 21:34 [RFC PATCH 00/14] mm: userspace hugepage collapse Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 01/14] mm/rmap: add mm_find_pmd_raw helper Zach O'Keefe
2022-03-09 22:48   ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 02/14] mm/khugepaged: add struct collapse_control Zach O'Keefe
2022-03-09 22:53   ` Yang Shi
2022-03-08 21:34 ` Zach O'Keefe [this message]
2022-03-08 21:34 ` [RFC PATCH 04/14] mm/khugepaged: separate khugepaged_scan_pmd() scan and collapse Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 05/14] mm/khugepaged: add mmap_assert_locked() checks to scan_pmd() Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 06/14] mm/khugepaged: add hugepage_vma_revalidate_pmd_count() Zach O'Keefe
2022-03-09 23:15   ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 07/14] mm/khugepaged: add vm_flags_ignore to hugepage_vma_revalidate_pmd_count() Zach O'Keefe
2022-03-09 23:17   ` Yang Shi
2022-03-10  0:00     ` Zach O'Keefe
2022-03-10  0:41       ` Yang Shi
2022-03-10  1:09         ` Zach O'Keefe
2022-03-10  2:16           ` Yang Shi
2022-03-10 15:50             ` Zach O'Keefe
2022-03-10 18:17               ` Yang Shi
2022-03-10 18:46                 ` David Rientjes
2022-03-10 18:58                   ` Zach O'Keefe
2022-03-10 19:54                   ` Yang Shi
2022-03-10 20:24                     ` Zach O'Keefe
2022-03-10 18:53                 ` Zach O'Keefe
2022-03-10 15:56   ` David Hildenbrand
2022-03-10 18:39     ` Zach O'Keefe
2022-03-10 18:54     ` David Rientjes
2022-03-21 14:27       ` Michal Hocko
2022-03-08 21:34 ` [RFC PATCH 08/14] mm/thp: add madv_thp_vm_flags to __transparent_hugepage_enabled() Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 09/14] mm/khugepaged: record SCAN_PAGE_COMPOUND when scan_pmd() finds THP Zach O'Keefe
2022-03-09 23:40   ` Yang Shi
2022-03-10  0:46     ` Zach O'Keefe
2022-03-10  2:05       ` Yang Shi
2022-03-10  8:37         ` Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 10/14] mm/khugepaged: rename khugepaged-specific/not functions Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 11/14] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-03-09 23:43   ` Yang Shi
2022-03-10  1:11     ` Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 12/14] mm/madvise: introduce batched madvise(MADV_COLLPASE) collapse Zach O'Keefe
2022-03-10  0:06   ` Yang Shi
2022-03-10 19:26     ` David Rientjes
2022-03-10 20:16       ` Matthew Wilcox
2022-03-11  0:06         ` Zach O'Keefe
2022-03-25 16:51           ` Zach O'Keefe
2022-03-25 19:54             ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 13/14] mm/madvise: add __madvise_collapse_*_batch() actions Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 14/14] mm/madvise: add process_madvise(MADV_COLLAPSE) Zach O'Keefe
2022-03-21 14:32 ` [RFC PATCH 00/14] mm: userspace hugepage collapse Zi Yan
2022-03-21 14:51   ` Zach O'Keefe
2022-03-21 14:37 ` Michal Hocko
2022-03-21 15:46   ` Zach O'Keefe
2022-03-22 12:11     ` Michal Hocko
2022-03-22 15:53       ` Zach O'Keefe
2022-03-29 12:24         ` Michal Hocko
2022-03-30  0:36           ` Zach O'Keefe
2022-03-22  6:40 ` Zach O'Keefe
2022-03-22 12:05   ` Michal Hocko
2022-03-23 13:30     ` Zach O'Keefe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220308213417.1407042-4-zokeefe@google.com \
    --to=zokeefe@google.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=arnd@arndb.de \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=axelrasmussen@google.com \
    --cc=chris@zankel.net \
    --cc=ckennelly@google.com \
    --cc=david@redhat.com \
    --cc=deller@gmx.de \
    --cc=hughd@google.com \
    --cc=ink@jurassic.park.msu.ru \
    --cc=jcmvbkbc@gmail.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-mm@kvack.org \
    --cc=mattst88@gmail.com \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=pasha.tatashin@soleen.com \
    --cc=patrickx@google.com \
    --cc=peterx@redhat.com \
    --cc=rientjes@google.com \
    --cc=rth@twiddle.net \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=songliubraving@fb.com \
    --cc=tsbogend@alpha.franken.de \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox