From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Matthew Wilcox <willy@infradead.org>,
Michal Hocko <mhocko@suse.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
Peter Xu <peterx@redhat.com>, SeongJae Park <sj@kernel.org>,
Song Liu <songliubraving@fb.com>,
Vlastimil Babka <vbabka@suse.cz>, Yang Shi <shy828301@gmail.com>,
Zi Yan <ziy@nvidia.com>,
linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Arnd Bergmann <arnd@arndb.de>,
Axel Rasmussen <axelrasmussen@google.com>,
Chris Kennelly <ckennelly@google.com>,
Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
Hugh Dickins <hughd@google.com>,
Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
Jens Axboe <axboe@kernel.dk>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Matt Turner <mattst88@gmail.com>,
Max Filippov <jcmvbkbc@gmail.com>,
Miaohe Lin <linmiaohe@huawei.com>,
Minchan Kim <minchan@kernel.org>,
Patrick Xia <patrickx@google.com>,
Pavel Begunkov <asml.silence@gmail.com>,
Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
"Zach O'Keefe" <zokeefe@google.com>
Subject: [PATCH v4 06/13] mm/khugepaged: add flag to ignore khugepaged_max_ptes_*
Date: Mon, 2 May 2022 11:17:07 -0700 [thread overview]
Message-ID: <20220502181714.3483177-7-zokeefe@google.com> (raw)
In-Reply-To: <20220502181714.3483177-1-zokeefe@google.com>
Add enforce_pte_scan_limits flag to struct collapse_control that allows
context to ignore the sysfs-controlled knobs
khugepaged_max_ptes_[none|swap|shared] and set this flag in khugepaged
collapse context to preserve existing khugepaged behavior.
This flag will be used (unset) when introducing madvise collapse
context since here, the user presumably has reason to believe the
collapse will be beneficial and khugepaged heuristics shouldn't tell
the user they are wrong.
Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
mm/khugepaged.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 986344a04165..94f18be83835 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -87,6 +87,9 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
#define MAX_PTE_MAPPED_THP 8
struct collapse_control {
+ /* Respect khugepaged_max_ptes_[none|swap|shared] */
+ bool enforce_pte_scan_limits;
+
/* Num pages scanned per node */
int node_load[MAX_NUMNODES];
@@ -614,6 +617,7 @@ static bool is_refcount_suitable(struct page *page)
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long address,
pte_t *pte,
+ struct collapse_control *cc,
struct list_head *compound_pagelist)
{
struct page *page = NULL;
@@ -627,7 +631,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
if (pte_none(pteval) || (pte_present(pteval) &&
is_zero_pfn(pte_pfn(pteval)))) {
if (!userfaultfd_armed(vma) &&
- ++none_or_zero <= khugepaged_max_ptes_none) {
+ (++none_or_zero <= khugepaged_max_ptes_none ||
+ !cc->enforce_pte_scan_limits)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
@@ -647,8 +652,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!PageAnon(page), page);
- if (page_mapcount(page) > 1 &&
- ++shared > khugepaged_max_ptes_shared) {
+ if (cc->enforce_pte_scan_limits && page_mapcount(page) > 1 &&
+ ++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
@@ -1186,7 +1191,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
mmu_notifier_invalidate_range_end(&range);
spin_lock(pte_ptl);
- result = __collapse_huge_page_isolate(vma, address, pte,
+ result = __collapse_huge_page_isolate(vma, address, pte, cc,
&compound_pagelist);
spin_unlock(pte_ptl);
@@ -1276,7 +1281,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (is_swap_pte(pteval)) {
- if (++unmapped <= khugepaged_max_ptes_swap) {
+ if (++unmapped <= khugepaged_max_ptes_swap ||
+ !cc->enforce_pte_scan_limits) {
/*
* Always be strict with uffd-wp
* enabled swap entries. Please see
@@ -1295,7 +1301,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
}
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
if (!userfaultfd_armed(vma) &&
- ++none_or_zero <= khugepaged_max_ptes_none) {
+ (++none_or_zero <= khugepaged_max_ptes_none ||
+ !cc->enforce_pte_scan_limits)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
@@ -1325,8 +1332,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unmap;
}
- if (page_mapcount(page) > 1 &&
- ++shared > khugepaged_max_ptes_shared) {
+ if (cc->enforce_pte_scan_limits &&
+ page_mapcount(page) > 1 &&
+ ++shared > khugepaged_max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
@@ -2056,7 +2064,8 @@ static int khugepaged_scan_file(struct mm_struct *mm,
continue;
if (xa_is_value(page)) {
- if (++swap > khugepaged_max_ptes_swap) {
+ if (cc->enforce_pte_scan_limits &&
+ ++swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
@@ -2107,7 +2116,8 @@ static int khugepaged_scan_file(struct mm_struct *mm,
rcu_read_unlock();
if (result == SCAN_SUCCEED) {
- if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
+ if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
+ cc->enforce_pte_scan_limits) {
result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
@@ -2337,6 +2347,7 @@ static int khugepaged(void *none)
{
struct mm_slot *mm_slot;
struct collapse_control cc = {
+ .enforce_pte_scan_limits = true,
.last_target_node = NUMA_NO_NODE,
.alloc_charge_hpage = &alloc_charge_hpage,
};
--
2.36.0.464.gb9c8b46e94-goog
next prev parent reply other threads:[~2022-05-02 18:17 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-02 18:17 [PATCH v4 00/12] mm: userspace hugepage collapse Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 01/13] mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds THP Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 02/13] mm/khugepaged: add struct collapse_control Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 03/13] mm/khugepaged: dedup and simplify hugepage alloc and charging Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 04/13] mm/khugepaged: make hugepage allocation context-specific Zach O'Keefe
2022-05-03 3:38 ` kernel test robot
2022-05-03 6:30 ` kernel test robot
2022-05-04 21:45 ` Zach O'Keefe
2022-05-04 2:25 ` [mm/khugepaged] 0d006aeaf9: BUG:unable_to_handle_page_fault_for_address kernel test robot
2022-05-04 21:46 ` Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 05/13] mm/khugepaged: pipe enum scan_result codes back to callers Zach O'Keefe
2022-05-02 18:17 ` Zach O'Keefe [this message]
2022-05-02 18:17 ` [PATCH v4 07/13] mm/khugepaged: add flag to ignore page young/referenced requirement Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 08/13] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-05-03 7:21 ` kernel test robot
2022-05-04 21:46 ` Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 09/13] mm/khugepaged: rename prefix of shared collapse functions Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 10/13] mm/madvise: add MADV_COLLAPSE to process_madvise() Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 11/13] selftests/vm: modularize collapse selftests Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 12/13] selftests/vm: add MADV_COLLAPSE collapse context to selftests Zach O'Keefe
2022-05-02 18:17 ` [PATCH v4 13/13] selftests/vm: add test to verify recollapse of THPs Zach O'Keefe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220502181714.3483177-7-zokeefe@google.com \
--to=zokeefe@google.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=alex.shi@linux.alibaba.com \
--cc=arnd@arndb.de \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=axelrasmussen@google.com \
--cc=chris@zankel.net \
--cc=ckennelly@google.com \
--cc=david@redhat.com \
--cc=deller@gmx.de \
--cc=hughd@google.com \
--cc=ink@jurassic.park.msu.ru \
--cc=jcmvbkbc@gmail.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linmiaohe@huawei.com \
--cc=linux-mm@kvack.org \
--cc=mattst88@gmail.com \
--cc=mhocko@suse.com \
--cc=minchan@kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=patrickx@google.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=songliubraving@fb.com \
--cc=tsbogend@alpha.franken.de \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox