From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Matthew Wilcox <willy@infradead.org>,
Michal Hocko <mhocko@suse.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
Peter Xu <peterx@redhat.com>, SeongJae Park <sj@kernel.org>,
Song Liu <songliubraving@fb.com>,
Vlastimil Babka <vbabka@suse.cz>, Yang Shi <shy828301@gmail.com>,
Zi Yan <ziy@nvidia.com>,
linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Arnd Bergmann <arnd@arndb.de>,
Axel Rasmussen <axelrasmussen@google.com>,
Chris Kennelly <ckennelly@google.com>,
Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
Hugh Dickins <hughd@google.com>,
Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
Jens Axboe <axboe@kernel.dk>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Matt Turner <mattst88@gmail.com>,
Max Filippov <jcmvbkbc@gmail.com>,
Miaohe Lin <linmiaohe@huawei.com>,
Minchan Kim <minchan@kernel.org>,
Patrick Xia <patrickx@google.com>,
Pavel Begunkov <asml.silence@gmail.com>,
Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
"Zach O'Keefe" <zokeefe@google.com>
Subject: [PATCH v5 06/13] mm/khugepaged: add flag to ignore khugepaged_max_ptes_*
Date: Wed, 4 May 2022 14:44:30 -0700 [thread overview]
Message-ID: <20220504214437.2850685-7-zokeefe@google.com> (raw)
In-Reply-To: <20220504214437.2850685-1-zokeefe@google.com>
Add enforce_pte_scan_limits flag to struct collapse_control that allows
context to ignore the sysfs-controlled knobs
khugepaged_max_ptes_[none|swap|shared] and set this flag in khugepaged
collapse context to preserve existing khugepaged behavior.
This flag will be used (unset) when introducing madvise collapse
context since here, the user presumably has reason to believe the
collapse will be beneficial and khugepaged heuristics shouldn't tell
the user they are wrong.
Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
mm/khugepaged.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 1314caed65b0..ca730aec0e3e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -87,6 +87,9 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
#define MAX_PTE_MAPPED_THP 8
struct collapse_control {
+ /* Respect khugepaged_max_ptes_[none|swap|shared] */
+ bool enforce_pte_scan_limits;
+
/* Num pages scanned per node */
int node_load[MAX_NUMNODES];
@@ -614,6 +617,7 @@ static bool is_refcount_suitable(struct page *page)
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long address,
pte_t *pte,
+ struct collapse_control *cc,
struct list_head *compound_pagelist)
{
struct page *page = NULL;
@@ -627,7 +631,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
if (pte_none(pteval) || (pte_present(pteval) &&
is_zero_pfn(pte_pfn(pteval)))) {
if (!userfaultfd_armed(vma) &&
- ++none_or_zero <= khugepaged_max_ptes_none) {
+ (++none_or_zero <= khugepaged_max_ptes_none ||
+ !cc->enforce_pte_scan_limits)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
@@ -647,8 +652,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!PageAnon(page), page);
- if (page_mapcount(page) > 1 &&
- ++shared > khugepaged_max_ptes_shared) {
+ if (cc->enforce_pte_scan_limits && page_mapcount(page) > 1 &&
+ ++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
@@ -1187,7 +1192,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
mmu_notifier_invalidate_range_end(&range);
spin_lock(pte_ptl);
- result = __collapse_huge_page_isolate(vma, address, pte,
+ result = __collapse_huge_page_isolate(vma, address, pte, cc,
&compound_pagelist);
spin_unlock(pte_ptl);
@@ -1275,7 +1280,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (is_swap_pte(pteval)) {
- if (++unmapped <= khugepaged_max_ptes_swap) {
+ if (++unmapped <= khugepaged_max_ptes_swap ||
+ !cc->enforce_pte_scan_limits) {
/*
* Always be strict with uffd-wp
* enabled swap entries. Please see
@@ -1294,7 +1300,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
}
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
if (!userfaultfd_armed(vma) &&
- ++none_or_zero <= khugepaged_max_ptes_none) {
+ (++none_or_zero <= khugepaged_max_ptes_none ||
+ !cc->enforce_pte_scan_limits)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
@@ -1324,8 +1331,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unmap;
}
- if (page_mapcount(page) > 1 &&
- ++shared > khugepaged_max_ptes_shared) {
+ if (cc->enforce_pte_scan_limits &&
+ page_mapcount(page) > 1 &&
+ ++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
@@ -2051,7 +2059,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
continue;
if (xa_is_value(page)) {
- if (++swap > khugepaged_max_ptes_swap) {
+ if (cc->enforce_pte_scan_limits &&
+ ++swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
@@ -2102,7 +2111,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
rcu_read_unlock();
if (result == SCAN_SUCCEED) {
- if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
+ if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
+ cc->enforce_pte_scan_limits) {
result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
@@ -2332,6 +2342,7 @@ static int khugepaged(void *none)
{
struct mm_slot *mm_slot;
struct collapse_control cc = {
+ .enforce_pte_scan_limits = true,
.last_target_node = NUMA_NO_NODE,
.alloc_charge_hpage = &alloc_charge_hpage,
};
--
2.36.0.464.gb9c8b46e94-goog
next prev parent reply other threads:[~2022-05-04 21:45 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-04 21:44 [PATCH v5 00/12] mm: userspace hugepage collapse Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 01/13] mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds THP Zach O'Keefe
2022-05-18 18:41 ` Peter Xu
2022-05-19 21:06 ` Zach O'Keefe
2022-05-20 1:12 ` Peter Xu
2022-05-04 21:44 ` [PATCH v5 02/13] mm/khugepaged: add struct collapse_control Zach O'Keefe
2022-05-12 20:02 ` David Rientjes
2022-05-18 20:03 ` Peter Xu
2022-05-18 20:11 ` Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 03/13] mm/khugepaged: dedup and simplify hugepage alloc and charging Zach O'Keefe
2022-05-12 20:02 ` David Rientjes
2022-05-13 18:26 ` Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 04/13] mm/khugepaged: make hugepage allocation context-specific Zach O'Keefe
2022-05-12 20:02 ` David Rientjes
2022-05-13 23:04 ` Zach O'Keefe
2022-05-13 23:17 ` Yang Shi
2022-05-13 23:55 ` Zach O'Keefe
2022-05-17 17:18 ` Yang Shi
2022-05-17 22:35 ` Zach O'Keefe
2022-05-25 17:58 ` Yang Shi
2022-05-25 18:27 ` Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 05/13] mm/khugepaged: pipe enum scan_result codes back to callers Zach O'Keefe
2022-05-12 20:02 ` David Rientjes
2022-05-04 21:44 ` Zach O'Keefe [this message]
2022-05-12 20:03 ` [PATCH v5 06/13] mm/khugepaged: add flag to ignore khugepaged_max_ptes_* David Rientjes
2022-05-04 21:44 ` [PATCH v5 07/13] mm/khugepaged: add flag to ignore page young/referenced requirement Zach O'Keefe
2022-05-12 20:03 ` David Rientjes
2022-05-13 18:17 ` Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 08/13] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-05-05 18:50 ` Zach O'Keefe
2022-05-05 18:58 ` Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 09/13] mm/khugepaged: rename prefix of shared collapse functions Zach O'Keefe
2022-05-12 20:03 ` David Rientjes
2022-05-04 21:44 ` [PATCH v5 10/13] mm/madvise: add MADV_COLLAPSE to process_madvise() Zach O'Keefe
2022-05-11 0:49 ` Rongwei Wang
2022-05-11 15:34 ` Zach O'Keefe
2022-05-12 15:53 ` Rongwei Wang
2022-05-12 20:03 ` David Rientjes
2022-05-13 21:06 ` Zach O'Keefe
2022-05-16 3:56 ` Rongwei Wang
2022-05-12 20:03 ` David Rientjes
2022-05-04 21:44 ` [PATCH v5 11/13] selftests/vm: modularize collapse selftests Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 12/13] selftests/vm: add MADV_COLLAPSE collapse context to selftests Zach O'Keefe
2022-05-04 21:44 ` [PATCH v5 13/13] selftests/vm: add test to verify recollapse of THPs Zach O'Keefe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220504214437.2850685-7-zokeefe@google.com \
--to=zokeefe@google.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=alex.shi@linux.alibaba.com \
--cc=arnd@arndb.de \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=axelrasmussen@google.com \
--cc=chris@zankel.net \
--cc=ckennelly@google.com \
--cc=david@redhat.com \
--cc=deller@gmx.de \
--cc=hughd@google.com \
--cc=ink@jurassic.park.msu.ru \
--cc=jcmvbkbc@gmail.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linmiaohe@huawei.com \
--cc=linux-mm@kvack.org \
--cc=mattst88@gmail.com \
--cc=mhocko@suse.com \
--cc=minchan@kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=patrickx@google.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=songliubraving@fb.com \
--cc=tsbogend@alpha.franken.de \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox