From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Michal Hocko <mhocko@suse.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
SeongJae Park <sj@kernel.org>, Song Liu <songliubraving@fb.com>,
Vlastimil Babka <vbabka@suse.cz>, Zi Yan <ziy@nvidia.com>,
linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Arnd Bergmann <arnd@arndb.de>,
Axel Rasmussen <axelrasmussen@google.com>,
Chris Kennelly <ckennelly@google.com>,
Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
Hugh Dickins <hughd@google.com>,
Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
Jens Axboe <axboe@kernel.dk>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Matthew Wilcox <willy@infradead.org>,
Matt Turner <mattst88@gmail.com>,
Max Filippov <jcmvbkbc@gmail.com>,
Miaohe Lin <linmiaohe@huawei.com>,
Minchan Kim <minchan@kernel.org>,
Patrick Xia <patrickx@google.com>,
Pavel Begunkov <asml.silence@gmail.com>,
Peter Xu <peterx@redhat.com>,
Richard Henderson <rth@twiddle.net>,
Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
Yang Shi <shy828301@gmail.com>,
"Zach O'Keefe" <zokeefe@google.com>
Subject: [RFC PATCH 04/14] mm/khugepaged: separate khugepaged_scan_pmd() scan and collapse
Date: Tue, 8 Mar 2022 13:34:07 -0800 [thread overview]
Message-ID: <20220308213417.1407042-5-zokeefe@google.com> (raw)
In-Reply-To: <20220308213417.1407042-1-zokeefe@google.com>
khugepaged_scan_pmd() currently does : (1) scan pmd to see if it's
suitable for collapse, then (2) do the collapse, if scan succeeds.
Separate out (1) so that it can be reused by itself later in the
series, and introduce a struct scan_pmd_result to gather data about the
scan.
Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
mm/khugepaged.c | 107 ++++++++++++++++++++++++++++++------------------
1 file changed, 67 insertions(+), 40 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e3399a451662..b204bc1eefa7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1244,27 +1244,34 @@ static void collapse_huge_page(struct mm_struct *mm,
return;
}
-static int khugepaged_scan_pmd(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long address,
- struct page **hpage,
- struct collapse_control *cc)
+struct scan_pmd_result {
+ int result;
+ bool writable;
+ int referenced;
+ int unmapped;
+ int none_or_zero;
+ struct page *head;
+};
+
+static void scan_pmd(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address,
+ struct collapse_control *cc,
+ struct scan_pmd_result *scan_result)
{
pmd_t *pmd;
pte_t *pte, *_pte;
- int ret = 0, result = 0, referenced = 0;
- int none_or_zero = 0, shared = 0;
+ int shared = 0;
struct page *page = NULL;
unsigned long _address;
spinlock_t *ptl;
- int node = NUMA_NO_NODE, unmapped = 0;
- bool writable = false;
+ int node = NUMA_NO_NODE;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
pmd = mm_find_pmd(mm, address);
if (!pmd) {
- result = SCAN_PMD_NULL;
+ scan_result->result = SCAN_PMD_NULL;
goto out;
}
@@ -1274,7 +1281,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (is_swap_pte(pteval)) {
- if (++unmapped <= khugepaged_max_ptes_swap ||
+ if (++scan_result->unmapped <=
+ khugepaged_max_ptes_swap ||
!cc->enforce_pte_scan_limits) {
/*
* Always be strict with uffd-wp
@@ -1282,23 +1290,24 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* comment below for pte_uffd_wp().
*/
if (pte_swp_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ scan_result->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
continue;
} else {
- result = SCAN_EXCEED_SWAP_PTE;
+ scan_result->result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
goto out_unmap;
}
}
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
if (!userfaultfd_armed(vma) &&
- (++none_or_zero <= khugepaged_max_ptes_none ||
+ (++scan_result->none_or_zero <=
+ khugepaged_max_ptes_none ||
!cc->enforce_pte_scan_limits)) {
continue;
} else {
- result = SCAN_EXCEED_NONE_PTE;
+ scan_result->result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
goto out_unmap;
}
@@ -1313,22 +1322,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* userfault messages that falls outside of
* the registered range. So, just be simple.
*/
- result = SCAN_PTE_UFFD_WP;
+ scan_result->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
if (pte_write(pteval))
- writable = true;
+ scan_result->writable = true;
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page)) {
- result = SCAN_PAGE_NULL;
+ scan_result->result = SCAN_PAGE_NULL;
goto out_unmap;
}
if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared &&
cc->enforce_pte_scan_limits) {
- result = SCAN_EXCEED_SHARED_PTE;
+ scan_result->result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
}
@@ -1338,25 +1347,25 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
/*
* Record which node the original page is from and save this
* information to cc->node_load[].
- * Khugepaged will allocate hugepage from the node has the max
+ * Caller should allocate hugepage from the node has the max
* hit record.
*/
node = page_to_nid(page);
if (khugepaged_scan_abort(node, cc)) {
- result = SCAN_SCAN_ABORT;
+ scan_result->result = SCAN_SCAN_ABORT;
goto out_unmap;
}
cc->node_load[node]++;
if (!PageLRU(page)) {
- result = SCAN_PAGE_LRU;
+ scan_result->result = SCAN_PAGE_LRU;
goto out_unmap;
}
if (PageLocked(page)) {
- result = SCAN_PAGE_LOCK;
+ scan_result->result = SCAN_PAGE_LOCK;
goto out_unmap;
}
if (!PageAnon(page)) {
- result = SCAN_PAGE_ANON;
+ scan_result->result = SCAN_PAGE_ANON;
goto out_unmap;
}
@@ -1378,35 +1387,53 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* will be done again later the risk seems low.
*/
if (!is_refcount_suitable(page)) {
- result = SCAN_PAGE_COUNT;
+ scan_result->result = SCAN_PAGE_COUNT;
goto out_unmap;
}
if (pte_young(pteval) ||
page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
- referenced++;
+ scan_result->referenced++;
}
- if (!writable) {
- result = SCAN_PAGE_RO;
- } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
- result = SCAN_LACK_REFERENCED_PAGE;
+ if (!scan_result->writable) {
+ scan_result->result = SCAN_PAGE_RO;
+ } else if (!scan_result->referenced ||
+ (scan_result->unmapped &&
+ scan_result->referenced < HPAGE_PMD_NR / 2)) {
+ scan_result->result = SCAN_LACK_REFERENCED_PAGE;
} else {
- result = SCAN_SUCCEED;
- ret = 1;
+ scan_result->result = SCAN_SUCCEED;
}
out_unmap:
pte_unmap_unlock(pte, ptl);
- if (ret) {
+out:
+ scan_result->head = page;
+}
+
+static int khugepaged_scan_pmd(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address,
+ struct page **hpage,
+ struct collapse_control *cc)
+{
+ int node;
+ struct scan_pmd_result scan_result = {};
+
+ scan_pmd(mm, vma, address, cc, &scan_result);
+ if (scan_result.result == SCAN_SUCCEED) {
node = khugepaged_find_target_node(cc);
/* collapse_huge_page will return with the mmap_lock released */
- collapse_huge_page(mm, address, hpage, node,
- referenced, unmapped,
- cc->enforce_pte_scan_limits);
+ collapse_huge_page(mm, khugepaged_scan.address, hpage, node,
+ scan_result.referenced, scan_result.unmapped,
+ cc->enforce_pte_scan_limits);
}
-out:
- trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
- none_or_zero, result, unmapped);
- return ret;
+
+ trace_mm_khugepaged_scan_pmd(mm, scan_result.head, scan_result.writable,
+ scan_result.referenced,
+ scan_result.none_or_zero,
+ scan_result.result, scan_result.unmapped);
+
+ return scan_result.result == SCAN_SUCCEED;
}
static void collect_mm_slot(struct mm_slot *mm_slot)
--
2.35.1.616.g0bdcbb4464-goog
next prev parent reply other threads:[~2022-03-08 21:34 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-08 21:34 [RFC PATCH 00/14] mm: userspace hugepage collapse Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 01/14] mm/rmap: add mm_find_pmd_raw helper Zach O'Keefe
2022-03-09 22:48 ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 02/14] mm/khugepaged: add struct collapse_control Zach O'Keefe
2022-03-09 22:53 ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 03/14] mm/khugepaged: add __do_collapse_huge_page() helper Zach O'Keefe
2022-03-08 21:34 ` Zach O'Keefe [this message]
2022-03-08 21:34 ` [RFC PATCH 05/14] mm/khugepaged: add mmap_assert_locked() checks to scan_pmd() Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 06/14] mm/khugepaged: add hugepage_vma_revalidate_pmd_count() Zach O'Keefe
2022-03-09 23:15 ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 07/14] mm/khugepaged: add vm_flags_ignore to hugepage_vma_revalidate_pmd_count() Zach O'Keefe
2022-03-09 23:17 ` Yang Shi
2022-03-10 0:00 ` Zach O'Keefe
2022-03-10 0:41 ` Yang Shi
2022-03-10 1:09 ` Zach O'Keefe
2022-03-10 2:16 ` Yang Shi
2022-03-10 15:50 ` Zach O'Keefe
2022-03-10 18:17 ` Yang Shi
2022-03-10 18:46 ` David Rientjes
2022-03-10 18:58 ` Zach O'Keefe
2022-03-10 19:54 ` Yang Shi
2022-03-10 20:24 ` Zach O'Keefe
2022-03-10 18:53 ` Zach O'Keefe
2022-03-10 15:56 ` David Hildenbrand
2022-03-10 18:39 ` Zach O'Keefe
2022-03-10 18:54 ` David Rientjes
2022-03-21 14:27 ` Michal Hocko
2022-03-08 21:34 ` [RFC PATCH 08/14] mm/thp: add madv_thp_vm_flags to __transparent_hugepage_enabled() Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 09/14] mm/khugepaged: record SCAN_PAGE_COMPOUND when scan_pmd() finds THP Zach O'Keefe
2022-03-09 23:40 ` Yang Shi
2022-03-10 0:46 ` Zach O'Keefe
2022-03-10 2:05 ` Yang Shi
2022-03-10 8:37 ` Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 10/14] mm/khugepaged: rename khugepaged-specific/not functions Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 11/14] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-03-09 23:43 ` Yang Shi
2022-03-10 1:11 ` Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 12/14] mm/madvise: introduce batched madvise(MADV_COLLPASE) collapse Zach O'Keefe
2022-03-10 0:06 ` Yang Shi
2022-03-10 19:26 ` David Rientjes
2022-03-10 20:16 ` Matthew Wilcox
2022-03-11 0:06 ` Zach O'Keefe
2022-03-25 16:51 ` Zach O'Keefe
2022-03-25 19:54 ` Yang Shi
2022-03-08 21:34 ` [RFC PATCH 13/14] mm/madvise: add __madvise_collapse_*_batch() actions Zach O'Keefe
2022-03-08 21:34 ` [RFC PATCH 14/14] mm/madvise: add process_madvise(MADV_COLLAPSE) Zach O'Keefe
2022-03-21 14:32 ` [RFC PATCH 00/14] mm: userspace hugepage collapse Zi Yan
2022-03-21 14:51 ` Zach O'Keefe
2022-03-21 14:37 ` Michal Hocko
2022-03-21 15:46 ` Zach O'Keefe
2022-03-22 12:11 ` Michal Hocko
2022-03-22 15:53 ` Zach O'Keefe
2022-03-29 12:24 ` Michal Hocko
2022-03-30 0:36 ` Zach O'Keefe
2022-03-22 6:40 ` Zach O'Keefe
2022-03-22 12:05 ` Michal Hocko
2022-03-23 13:30 ` Zach O'Keefe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220308213417.1407042-5-zokeefe@google.com \
--to=zokeefe@google.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=alex.shi@linux.alibaba.com \
--cc=arnd@arndb.de \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=axelrasmussen@google.com \
--cc=chris@zankel.net \
--cc=ckennelly@google.com \
--cc=david@redhat.com \
--cc=deller@gmx.de \
--cc=hughd@google.com \
--cc=ink@jurassic.park.msu.ru \
--cc=jcmvbkbc@gmail.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linmiaohe@huawei.com \
--cc=linux-mm@kvack.org \
--cc=mattst88@gmail.com \
--cc=mhocko@suse.com \
--cc=minchan@kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=patrickx@google.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=rth@twiddle.net \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=songliubraving@fb.com \
--cc=tsbogend@alpha.franken.de \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox