From: Vernon Yang <vernon2gm@gmail.com>
To: "David Hildenbrand (Arm)" <david@kernel.org>
Cc: akpm@linux-foundation.org, lorenzo.stoakes@oracle.com,
ziy@nvidia.com, dev.jain@arm.com, baohua@kernel.org,
lance.yang@linux.dev, richard.weiyang@gmail.com,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Vernon Yang <yanglincheng@kylinos.cn>
Subject: Re: [PATCH] mm: khugepaged: simplify scanning progress in pmd
Date: Thu, 26 Feb 2026 17:57:56 +0800 [thread overview]
Message-ID: <zdvzmoop5xswqcyiwmvvrdfianm4ccs3gryfecwbm4bhuh7ebo@7an4huwgbuwo> (raw)
In-Reply-To: <06ff4738-e81a-49ab-9204-74a6829d8d8b@kernel.org>
On Thu, Feb 26, 2026 at 09:42:41AM +0100, David Hildenbrand (Arm) wrote:
> On 2/26/26 09:13, Vernon Yang wrote:
> > From: Vernon Yang <yanglincheng@kylinos.cn>
> >
> > Placing "cur_progress" inside "struct collapse_control" makes the
> > overall code simpler, there also coincidentally has a 4-bytes hole,
> > as shown below:
> >
> > struct collapse_control {
> > bool is_khugepaged; /* 0 1 */
> > /* XXX 3 bytes hole, try to pack */
> > u32 node_load[64]; /* 4 256 */
> > /* XXX 4 bytes hole, try to pack */
> > /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
> > nodemask_t alloc_nmask; /* 264 8 */
> >
> > /* size: 272, cachelines: 5, members: 3 */
> > /* sum members: 265, holes: 2, sum holes: 7 */
> > /* last cacheline: 16 bytes */
> > };
> >
> > Also rename "cur_progress" to "progress_in_pmd", make it clearer.
> >
> > No function changes.
> >
> > Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn>
> > ---
>
> We should really squash that into the original patch and revisit it.
Yes, squashing this patch into the original patch makes the git commit
log clearer.
> Because I think we might be able to do even better.
>
> Could we just switch to "cc->progress" like so (modified original patch):
>
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 4d7baf220ad9..5d67e5e60ece 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -68,7 +68,10 @@ enum scan_result {
> static struct task_struct *khugepaged_thread __read_mostly;
> static DEFINE_MUTEX(khugepaged_mutex);
>
> -/* default scan 8*HPAGE_PMD_NR ptes (or vmas) every 10 second */
> +/*
> + * default scan 8*HPAGE_PMD_NR ptes, pte_mapped_hugepage, pmd_mapped,
> + * no_pte_table or vmas every 10 second.
> + */
> static unsigned int khugepaged_pages_to_scan __read_mostly;
> static unsigned int khugepaged_pages_collapsed;
> static unsigned int khugepaged_full_scans;
> @@ -100,6 +103,9 @@ struct collapse_control {
> /* Num pages scanned per node */
> u32 node_load[MAX_NUMNODES];
>
> + /* Num pages scanned (see khugepaged_pages_to_scan). */
> + unsigned int progress;
> +
> /* nodemask for allocation fallback */
> nodemask_t alloc_nmask;
> };
> @@ -1247,19 +1253,24 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
> VM_BUG_ON(start_addr & ~HPAGE_PMD_MASK);
>
> result = find_pmd_or_thp_or_none(mm, start_addr, &pmd);
> - if (result != SCAN_SUCCEED)
> + if (result != SCAN_SUCCEED) {
> + cc->progress++;
> goto out;
> + }
>
> memset(cc->node_load, 0, sizeof(cc->node_load));
> nodes_clear(cc->alloc_nmask);
> pte = pte_offset_map_lock(mm, pmd, start_addr, &ptl);
> if (!pte) {
> + cc->progress++;
> result = SCAN_NO_PTE_TABLE;
> goto out;
> }
>
> for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
> _pte++, addr += PAGE_SIZE) {
> + cc->progress++;
> +
> pte_t pteval = ptep_get(_pte);
> if (pte_none_or_zero(pteval)) {
> ++none_or_zero;
> @@ -2370,6 +2381,10 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
> }
> }
> rcu_read_unlock();
> + if (result == SCAN_PTE_MAPPED_HUGEPAGE)
> + cc->progress++;
> + else
> + cc->progress += HPAGE_PMD_NR;
>
> if (result == SCAN_SUCCEED) {
> if (cc->is_khugepaged &&
> @@ -2385,8 +2400,8 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
> return result;
> }
>
> -static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result *result,
> - struct collapse_control *cc)
> +static void khugepaged_scan_mm_slot(unsigned int progress_max,
> + enum scan_result *result, struct collapse_control *cc)
> __releases(&khugepaged_mm_lock)
> __acquires(&khugepaged_mm_lock)
> {
> @@ -2394,9 +2409,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
> struct mm_slot *slot;
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> - int progress = 0;
>
> - VM_BUG_ON(!pages);
> lockdep_assert_held(&khugepaged_mm_lock);
> *result = SCAN_FAIL;
>
> @@ -2419,7 +2432,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
> if (unlikely(!mmap_read_trylock(mm)))
> goto breakouterloop_mmap_lock;
>
> - progress++;
> + cc->progress++;
> if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
> goto breakouterloop;
>
> @@ -2429,17 +2442,17 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>
> cond_resched();
> if (unlikely(hpage_collapse_test_exit_or_disable(mm))) {
> - progress++;
> + cc->progress++;
> break;
> }
> if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) {
> - progress++;
> + cc->progress++;
> continue;
> }
> hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> hend = round_down(vma->vm_end, HPAGE_PMD_SIZE);
> if (khugepaged_scan.address > hend) {
> - progress++;
> + cc->progress++;
> continue;
> }
> if (khugepaged_scan.address < hstart)
> @@ -2486,7 +2499,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>
> /* move to next address */
> khugepaged_scan.address += HPAGE_PMD_SIZE;
> - progress += HPAGE_PMD_NR;
> if (!mmap_locked)
> /*
> * We released mmap_lock so break loop. Note
> @@ -2496,7 +2508,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
> * correct result back to caller.
> */
> goto breakouterloop_mmap_lock;
> - if (progress >= pages)
> + if (cc->progress >= progress_max)
> goto breakouterloop;
> }
> }
> @@ -2527,9 +2539,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
> collect_mm_slot(slot);
> }
>
> - trace_mm_khugepaged_scan(mm, progress, khugepaged_scan.mm_slot == NULL);
> -
> - return progress;
> + trace_mm_khugepaged_scan(mm, cc->progress, khugepaged_scan.mm_slot == NULL);
> }
>
> static int khugepaged_has_work(void)
> @@ -2545,13 +2555,14 @@ static int khugepaged_wait_event(void)
>
> static void khugepaged_do_scan(struct collapse_control *cc)
> {
> - unsigned int progress = 0, pass_through_head = 0;
> - unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
> + const unsigned int progress_max = READ_ONCE(khugepaged_pages_to_scan);
> + unsigned int pass_through_head = 0;
> bool wait = true;
> enum scan_result result = SCAN_SUCCEED;
>
> lru_add_drain_all();
>
> + cc->progress = 0;
madvise(MADV_COLLAPSE) is missing the initialization of "cc->progress"
because "cc = kmalloc_obj(*cc)". We need to manually initialize it in
madvise_collapse().
LGTM, I will submit v2 with this patch and squashed into the original
patch.
--
Cheers,
Vernon
next prev parent reply other threads:[~2026-02-26 9:58 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-26 8:13 Vernon Yang
2026-02-26 8:42 ` David Hildenbrand (Arm)
2026-02-26 9:57 ` Vernon Yang [this message]
2026-02-26 10:01 ` David Hildenbrand (Arm)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=zdvzmoop5xswqcyiwmvvrdfianm4ccs3gryfecwbm4bhuh7ebo@7an4huwgbuwo \
--to=vernon2gm@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=richard.weiyang@gmail.com \
--cc=yanglincheng@kylinos.cn \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox