linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Vernon Yang <vernon2gm@gmail.com>
To: "David Hildenbrand (Arm)" <david@kernel.org>
Cc: akpm@linux-foundation.org, lorenzo.stoakes@oracle.com,
	ziy@nvidia.com,  dev.jain@arm.com, baohua@kernel.org,
	lance.yang@linux.dev,  richard.weiyang@gmail.com,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	 Vernon Yang <yanglincheng@kylinos.cn>
Subject: Re: [PATCH] mm: khugepaged: simplify scanning progress in pmd
Date: Thu, 26 Feb 2026 17:57:56 +0800	[thread overview]
Message-ID: <zdvzmoop5xswqcyiwmvvrdfianm4ccs3gryfecwbm4bhuh7ebo@7an4huwgbuwo> (raw)
In-Reply-To: <06ff4738-e81a-49ab-9204-74a6829d8d8b@kernel.org>

On Thu, Feb 26, 2026 at 09:42:41AM +0100, David Hildenbrand (Arm) wrote:
> On 2/26/26 09:13, Vernon Yang wrote:
> > From: Vernon Yang <yanglincheng@kylinos.cn>
> >
> > Placing "cur_progress" inside "struct collapse_control" makes the
> > overall code simpler, there also coincidentally has a 4-bytes hole,
> > as shown below:
> >
> > struct collapse_control {
> >         bool                       is_khugepaged;        /*     0     1 */
> >         /* XXX 3 bytes hole, try to pack */
> >         u32                        node_load[64];        /*     4   256 */
> >         /* XXX 4 bytes hole, try to pack */
> >         /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
> >         nodemask_t                 alloc_nmask;          /*   264     8 */
> >
> >         /* size: 272, cachelines: 5, members: 3 */
> >         /* sum members: 265, holes: 2, sum holes: 7 */
> >         /* last cacheline: 16 bytes */
> > };
> >
> > Also rename "cur_progress" to "progress_in_pmd", make it clearer.
> >
> > No function changes.
> >
> > Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn>
> > ---
>
> We should really squash that into the original patch and revisit it.

Yes, squashing this patch into the original patch makes the git commit
log clearer.

> Because I think we might be able to do even better.
>
> Could we just switch to "cc->progress" like so (modified original patch):
>
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 4d7baf220ad9..5d67e5e60ece 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -68,7 +68,10 @@ enum scan_result {
>  static struct task_struct *khugepaged_thread __read_mostly;
>  static DEFINE_MUTEX(khugepaged_mutex);
>
> -/* default scan 8*HPAGE_PMD_NR ptes (or vmas) every 10 second */
> +/*
> + * default scan 8*HPAGE_PMD_NR ptes, pte_mapped_hugepage, pmd_mapped,
> + * no_pte_table or vmas every 10 second.
> + */
>  static unsigned int khugepaged_pages_to_scan __read_mostly;
>  static unsigned int khugepaged_pages_collapsed;
>  static unsigned int khugepaged_full_scans;
> @@ -100,6 +103,9 @@ struct collapse_control {
>  	/* Num pages scanned per node */
>  	u32 node_load[MAX_NUMNODES];
>
> +	/* Num pages scanned (see khugepaged_pages_to_scan).  */
> +	unsigned int progress;
> +
>  	/* nodemask for allocation fallback */
>  	nodemask_t alloc_nmask;
>  };
> @@ -1247,19 +1253,24 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
>  	VM_BUG_ON(start_addr & ~HPAGE_PMD_MASK);
>
>  	result = find_pmd_or_thp_or_none(mm, start_addr, &pmd);
> -	if (result != SCAN_SUCCEED)
> +	if (result != SCAN_SUCCEED) {
> +		cc->progress++;
>  		goto out;
> +	}
>
>  	memset(cc->node_load, 0, sizeof(cc->node_load));
>  	nodes_clear(cc->alloc_nmask);
>  	pte = pte_offset_map_lock(mm, pmd, start_addr, &ptl);
>  	if (!pte) {
> +		cc->progress++;
>  		result = SCAN_NO_PTE_TABLE;
>  		goto out;
>  	}
>
>  	for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
>  	     _pte++, addr += PAGE_SIZE) {
> +		cc->progress++;
> +
>  		pte_t pteval = ptep_get(_pte);
>  		if (pte_none_or_zero(pteval)) {
>  			++none_or_zero;
> @@ -2370,6 +2381,10 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
>  		}
>  	}
>  	rcu_read_unlock();
> +	if (result == SCAN_PTE_MAPPED_HUGEPAGE)
> +		cc->progress++;
> +	else
> +		cc->progress += HPAGE_PMD_NR;
>
>  	if (result == SCAN_SUCCEED) {
>  		if (cc->is_khugepaged &&
> @@ -2385,8 +2400,8 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm, unsigned
>  	return result;
>  }
>
> -static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result *result,
> -					    struct collapse_control *cc)
> +static void khugepaged_scan_mm_slot(unsigned int progress_max,
> +		enum scan_result *result, struct collapse_control *cc)
>  	__releases(&khugepaged_mm_lock)
>  	__acquires(&khugepaged_mm_lock)
>  {
> @@ -2394,9 +2409,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>  	struct mm_slot *slot;
>  	struct mm_struct *mm;
>  	struct vm_area_struct *vma;
> -	int progress = 0;
>
> -	VM_BUG_ON(!pages);
>  	lockdep_assert_held(&khugepaged_mm_lock);
>  	*result = SCAN_FAIL;
>
> @@ -2419,7 +2432,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>  	if (unlikely(!mmap_read_trylock(mm)))
>  		goto breakouterloop_mmap_lock;
>
> -	progress++;
> +	cc->progress++;
>  	if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
>  		goto breakouterloop;
>
> @@ -2429,17 +2442,17 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>
>  		cond_resched();
>  		if (unlikely(hpage_collapse_test_exit_or_disable(mm))) {
> -			progress++;
> +			cc->progress++;
>  			break;
>  		}
>  		if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) {
> -			progress++;
> +			cc->progress++;
>  			continue;
>  		}
>  		hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
>  		hend = round_down(vma->vm_end, HPAGE_PMD_SIZE);
>  		if (khugepaged_scan.address > hend) {
> -			progress++;
> +			cc->progress++;
>  			continue;
>  		}
>  		if (khugepaged_scan.address < hstart)
> @@ -2486,7 +2499,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>
>  			/* move to next address */
>  			khugepaged_scan.address += HPAGE_PMD_SIZE;
> -			progress += HPAGE_PMD_NR;
>  			if (!mmap_locked)
>  				/*
>  				 * We released mmap_lock so break loop.  Note
> @@ -2496,7 +2508,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>  				 * correct result back to caller.
>  				 */
>  				goto breakouterloop_mmap_lock;
> -			if (progress >= pages)
> +			if (cc->progress >= progress_max)
>  				goto breakouterloop;
>  		}
>  	}
> @@ -2527,9 +2539,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
>  		collect_mm_slot(slot);
>  	}
>
> -	trace_mm_khugepaged_scan(mm, progress, khugepaged_scan.mm_slot == NULL);
> -
> -	return progress;
> +	trace_mm_khugepaged_scan(mm, cc->progress, khugepaged_scan.mm_slot == NULL);
>  }
>
>  static int khugepaged_has_work(void)
> @@ -2545,13 +2555,14 @@ static int khugepaged_wait_event(void)
>
>  static void khugepaged_do_scan(struct collapse_control *cc)
>  {
> -	unsigned int progress = 0, pass_through_head = 0;
> -	unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
> +	const unsigned int progress_max = READ_ONCE(khugepaged_pages_to_scan);
> +	unsigned int pass_through_head = 0;
>  	bool wait = true;
>  	enum scan_result result = SCAN_SUCCEED;
>
>  	lru_add_drain_all();
>
> +	cc->progress = 0;

madvise(MADV_COLLAPSE) is missing the initialization of "cc->progress"
because "cc = kmalloc_obj(*cc)". We need to manually initialize it in
madvise_collapse().

LGTM, I will submit v2 with this patch and squashed into the original
patch.

--
Cheers,
Vernon


  reply	other threads:[~2026-02-26  9:58 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-26  8:13 Vernon Yang
2026-02-26  8:42 ` David Hildenbrand (Arm)
2026-02-26  9:57   ` Vernon Yang [this message]
2026-02-26 10:01     ` David Hildenbrand (Arm)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=zdvzmoop5xswqcyiwmvvrdfianm4ccs3gryfecwbm4bhuh7ebo@7an4huwgbuwo \
    --to=vernon2gm@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=david@kernel.org \
    --cc=dev.jain@arm.com \
    --cc=lance.yang@linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=richard.weiyang@gmail.com \
    --cc=yanglincheng@kylinos.cn \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox