From: "David Hildenbrand (Arm)" <david@kernel.org>
To: Vernon Yang <vernon2gm@gmail.com>, akpm@linux-foundation.org
Cc: lorenzo.stoakes@oracle.com, ziy@nvidia.com, dev.jain@arm.com,
baohua@kernel.org, lance.yang@linux.dev,
richard.weiyang@gmail.com, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Vernon Yang <yanglincheng@kylinos.cn>
Subject: Re: [PATCH] mm: khugepaged: simplify scanning progress in pmd
Date: Thu, 26 Feb 2026 09:42:41 +0100 [thread overview]
Message-ID: <06ff4738-e81a-49ab-9204-74a6829d8d8b@kernel.org> (raw)
In-Reply-To: <20260226081313.53518-1-vernon2gm@gmail.com>
On 2/26/26 09:13, Vernon Yang wrote:
> From: Vernon Yang <yanglincheng@kylinos.cn>
>
> Placing "cur_progress" inside "struct collapse_control" makes the
> overall code simpler, there also coincidentally has a 4-bytes hole,
> as shown below:
>
> struct collapse_control {
> bool is_khugepaged; /* 0 1 */
> /* XXX 3 bytes hole, try to pack */
> u32 node_load[64]; /* 4 256 */
> /* XXX 4 bytes hole, try to pack */
> /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
> nodemask_t alloc_nmask; /* 264 8 */
>
> /* size: 272, cachelines: 5, members: 3 */
> /* sum members: 265, holes: 2, sum holes: 7 */
> /* last cacheline: 16 bytes */
> };
>
> Also rename "cur_progress" to "progress_in_pmd", make it clearer.
>
> No function changes.
>
> Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn>
> ---
We should really squash that into the original patch and revisit it.
Because I think we might be able to do even better.
Could we just switch to "cc->progress" like so (modified original patch):
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4d7baf220ad9..5d67e5e60ece 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -68,7 +68,10 @@ enum scan_result {
static struct task_struct *khugepaged_thread __read_mostly;
static DEFINE_MUTEX(khugepaged_mutex);
-/* default scan 8*HPAGE_PMD_NR ptes (or vmas) every 10 second */
+/*
+ * default scan 8*HPAGE_PMD_NR ptes, pte_mapped_hugepage, pmd_mapped,
+ * no_pte_table or vmas every 10 second.
+ */
static unsigned int khugepaged_pages_to_scan __read_mostly;
static unsigned int khugepaged_pages_collapsed;
static unsigned int khugepaged_full_scans;
@@ -100,6 +103,9 @@ struct collapse_control {
/* Num pages scanned per node */
u32 node_load[MAX_NUMNODES];
+ /* Num pages scanned (see khugepaged_pages_to_scan). */
+ unsigned int progress;
+
/* nodemask for allocation fallback */
nodemask_t alloc_nmask;
};
@@ -1247,19 +1253,24 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
VM_BUG_ON(start_addr & ~HPAGE_PMD_MASK);
result = find_pmd_or_thp_or_none(mm, start_addr, &pmd);
- if (result != SCAN_SUCCEED)
+ if (result != SCAN_SUCCEED) {
+ cc->progress++;
goto out;
+ }
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
pte = pte_offset_map_lock(mm, pmd, start_addr, &ptl);
if (!pte) {
+ cc->progress++;
result = SCAN_NO_PTE_TABLE;
goto out;
}
for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
+ cc->progress++;
+
pte_t pteval = ptep_get(_pte);
if (pte_none_or_zero(pteval)) {
++none_or_zero;
@@ -2370,6 +2381,10 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
}
}
rcu_read_unlock();
+ if (result == SCAN_PTE_MAPPED_HUGEPAGE)
+ cc->progress++;
+ else
+ cc->progress += HPAGE_PMD_NR;
if (result == SCAN_SUCCEED) {
if (cc->is_khugepaged &&
@@ -2385,8 +2400,8 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
return result;
}
-static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result *result,
- struct collapse_control *cc)
+static void khugepaged_scan_mm_slot(unsigned int progress_max,
+ enum scan_result *result, struct collapse_control *cc)
__releases(&khugepaged_mm_lock)
__acquires(&khugepaged_mm_lock)
{
@@ -2394,9 +2409,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
struct mm_slot *slot;
struct mm_struct *mm;
struct vm_area_struct *vma;
- int progress = 0;
- VM_BUG_ON(!pages);
lockdep_assert_held(&khugepaged_mm_lock);
*result = SCAN_FAIL;
@@ -2419,7 +2432,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
if (unlikely(!mmap_read_trylock(mm)))
goto breakouterloop_mmap_lock;
- progress++;
+ cc->progress++;
if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
goto breakouterloop;
@@ -2429,17 +2442,17 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
cond_resched();
if (unlikely(hpage_collapse_test_exit_or_disable(mm))) {
- progress++;
+ cc->progress++;
break;
}
if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) {
- progress++;
+ cc->progress++;
continue;
}
hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
hend = round_down(vma->vm_end, HPAGE_PMD_SIZE);
if (khugepaged_scan.address > hend) {
- progress++;
+ cc->progress++;
continue;
}
if (khugepaged_scan.address < hstart)
@@ -2486,7 +2499,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
/* move to next address */
khugepaged_scan.address += HPAGE_PMD_SIZE;
- progress += HPAGE_PMD_NR;
if (!mmap_locked)
/*
* We released mmap_lock so break loop. Note
@@ -2496,7 +2508,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
* correct result back to caller.
*/
goto breakouterloop_mmap_lock;
- if (progress >= pages)
+ if (cc->progress >= progress_max)
goto breakouterloop;
}
}
@@ -2527,9 +2539,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
collect_mm_slot(slot);
}
- trace_mm_khugepaged_scan(mm, progress, khugepaged_scan.mm_slot == NULL);
-
- return progress;
+ trace_mm_khugepaged_scan(mm, cc->progress, khugepaged_scan.mm_slot == NULL);
}
static int khugepaged_has_work(void)
@@ -2545,13 +2555,14 @@ static int khugepaged_wait_event(void)
static void khugepaged_do_scan(struct collapse_control *cc)
{
- unsigned int progress = 0, pass_through_head = 0;
- unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
+ const unsigned int progress_max = READ_ONCE(khugepaged_pages_to_scan);
+ unsigned int pass_through_head = 0;
bool wait = true;
enum scan_result result = SCAN_SUCCEED;
lru_add_drain_all();
+ cc->progress = 0;
while (true) {
cond_resched();
@@ -2563,13 +2574,12 @@ static void khugepaged_do_scan(struct collapse_control *cc)
pass_through_head++;
if (khugepaged_has_work() &&
pass_through_head < 2)
- progress += khugepaged_scan_mm_slot(pages - progress,
- &result, cc);
+ khugepaged_scan_mm_slot(progress_max, &result, cc);
else
- progress = pages;
+ cc->progress = progress_max;
spin_unlock(&khugepaged_mm_lock);
- if (progress >= pages)
+ if (cc->progress >= progress_max)
break;
if (result == SCAN_ALLOC_HUGE_PAGE_FAIL) {
--
2.43.0
--
Cheers,
David
next prev parent reply other threads:[~2026-02-26 8:42 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-26 8:13 Vernon Yang
2026-02-26 8:42 ` David Hildenbrand (Arm) [this message]
2026-02-26 9:57 ` Vernon Yang
2026-02-26 10:01 ` David Hildenbrand (Arm)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=06ff4738-e81a-49ab-9204-74a6829d8d8b@kernel.org \
--to=david@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=dev.jain@arm.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=richard.weiyang@gmail.com \
--cc=vernon2gm@gmail.com \
--cc=yanglincheng@kylinos.cn \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox