From: "David Hildenbrand (Arm)" <david@kernel.org>
To: Mike Rapoport <rppt@kernel.org>
Cc: Tianyou Li <tianyou.li@intel.com>,
Oscar Salvador <osalvador@suse.de>,
Wei Yang <richard.weiyang@gmail.com>,
Michal Hocko <mhocko@suse.com>,
linux-mm@kvack.org, Yong Hu <yong.hu@intel.com>,
Nanhai Zou <nanhai.zou@intel.com>, Yuan Liu <yuan1.liu@intel.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Qiuxu Zhuo <qiuxu.zhuo@intel.com>,
Yu C Chen <yu.c.chen@intel.com>, Pan Deng <pan.deng@intel.com>,
Chen Zhang <zhangchen.kidd@jd.com>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v9 2/2] mm/memory hotplug/unplug: Optimize zone->contiguous update when changes pfn range
Date: Wed, 11 Feb 2026 13:19:56 +0100 [thread overview]
Message-ID: <b3777d0e-0b25-45bc-b387-e67fe5a0d328@kernel.org> (raw)
In-Reply-To: <aYsaBkMecDG595Xg@kernel.org>
>> *
>> + * online_pages is pages within the zone that have an online memmap.
>> + * online_pages include present pages and memory holes that have a
>> + * memmap. When spanned_pages == online_pages, pfn_to_page() can be
>> + * performed without further checks on any pfn within the zone span.
>
> Maybe pages_with_memmap? It would stand off from managed, spanned and
> present, but it's clearer than online IMHO.
offline pages also have a memmap, but that should not be touched as it
might contain garbage. So it's a bit more tricky :)
>
>> + *
>> * So present_pages may be used by memory hotplug or memory power
>> * management logic to figure out unmanaged pages by checking
>> * (present_pages - managed_pages). And managed_pages should be used
>> @@ -967,6 +972,7 @@ struct zone {
>> atomic_long_t managed_pages;
>> unsigned long spanned_pages;
>> unsigned long present_pages;
>> + unsigned long online_pages;
>> #if defined(CONFIG_MEMORY_HOTPLUG)
>> unsigned long present_early_pages;
>> #endif
>> @@ -1051,8 +1057,6 @@ struct zone {
>> bool compact_blockskip_flush;
>> #endif
>> - bool contiguous;
>> -
>> CACHELINE_PADDING(_pad3_);
>> /* Zone statistics */
>> atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
>> @@ -1124,6 +1128,23 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
>> return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
>> }
>> +/**
>> + * zone_is_contiguous - test whether a zone is contiguous
>> + * @zone: the zone to test.
>> + *
>> + * In a contiguous zone, it is valid to call pfn_to_page() on any pfn in the
>> + * spanned zone without requiting pfn_valid() or pfn_to_online_page() checks.
>> + *
>> + * Returns: true if contiguous, otherwise false.
>> + */
>> +static inline bool zone_is_contiguous(const struct zone *zone)
>> +{
>> + return READ_ONCE(zone->spanned_pages) == READ_ONCE(zone->online_pages);
>> +}
>> +
>> static inline bool zone_is_initialized(const struct zone *zone)
>> {
>> return zone->initialized;
>> diff --git a/mm/internal.h b/mm/internal.h
>> index f35dbcf99a86..6062f9b8ee62 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -716,21 +716,15 @@ extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
>> static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
>> unsigned long end_pfn, struct zone *zone)
>> {
>> - if (zone->contiguous)
>> + if (zone_is_contiguous(zone))
>> return pfn_to_page(start_pfn);
>> return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
>> }
>> -void set_zone_contiguous(struct zone *zone);
>> bool pfn_range_intersects_zones(int nid, unsigned long start_pfn,
>> unsigned long nr_pages);
>> -static inline void clear_zone_contiguous(struct zone *zone)
>> -{
>> - zone->contiguous = false;
>> -}
>> -
>> extern int __isolate_free_page(struct page *page, unsigned int order);
>> extern void __putback_isolated_page(struct page *page, unsigned int order,
>> int mt);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index a63ec679d861..76496c1039a9 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -492,11 +492,11 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>> pfn = find_smallest_section_pfn(nid, zone, end_pfn,
>> zone_end_pfn(zone));
>> if (pfn) {
>> - zone->spanned_pages = zone_end_pfn(zone) - pfn;
>> + WRITE_ONCE(zone->spanned_pages, zone_end_pfn(zone) - pfn);
>> zone->zone_start_pfn = pfn;
>> } else {
>> zone->zone_start_pfn = 0;
>> - zone->spanned_pages = 0;
>> + WRITE_ONCE(zone->spanned_pages, 0);
>> }
>> } else if (zone_end_pfn(zone) == end_pfn) {
>> /*
>> @@ -508,10 +508,10 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>> pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
>> start_pfn);
>> if (pfn)
>> - zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
>> + WRITE_ONCE(zone->spanned_pages, pfn - zone->zone_start_pfn + 1);
>> else {
>> zone->zone_start_pfn = 0;
>> - zone->spanned_pages = 0;
>> + WRITE_ONCE(zone->spanned_pages, 0);
>> }
>> }
>> }
>> @@ -565,18 +565,13 @@ void remove_pfn_range_from_zone(struct zone *zone,
>> /*
>> * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
>> - * we will not try to shrink the zones - which is okay as
>> - * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
>> + * we will not try to shrink the zones.
>> */
>> if (zone_is_zone_device(zone))
>> return;
>> - clear_zone_contiguous(zone);
>> -
>> shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
>> update_pgdat_span(pgdat);
>> -
>> - set_zone_contiguous(zone);
>> }
>> /**
>> @@ -753,8 +748,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>> struct pglist_data *pgdat = zone->zone_pgdat;
>> int nid = pgdat->node_id;
>> - clear_zone_contiguous(zone);
>> -
>> if (zone_is_empty(zone))
>> init_currently_empty_zone(zone, start_pfn, nr_pages);
>> resize_zone_range(zone, start_pfn, nr_pages);
>> @@ -782,8 +775,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>> memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
>> MEMINIT_HOTPLUG, altmap, migratetype,
>> isolate_pageblock);
>> -
>> - set_zone_contiguous(zone);
>> }
>> struct auto_movable_stats {
>> @@ -1079,6 +1070,7 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
>> if (early_section(__pfn_to_section(page_to_pfn(page))))
>> zone->present_early_pages += nr_pages;
>> zone->present_pages += nr_pages;
>> + WRITE_ONCE(zone->online_pages, zone->online_pages + nr_pages);
>> zone->zone_pgdat->node_present_pages += nr_pages;
>> if (group && movable)
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index 2a809cd8e7fa..e33caa6fb6fc 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -2263,9 +2263,10 @@ void __init init_cma_pageblock(struct page *page)
>> }
>> #endif
>> -void set_zone_contiguous(struct zone *zone)
>> +static void calc_online_pages(struct zone *zone)
>> {
>> unsigned long block_start_pfn = zone->zone_start_pfn;
>> + unsigned long online_pages = 0;
>> unsigned long block_end_pfn;
>> block_end_pfn = pageblock_end_pfn(block_start_pfn);
>> @@ -2277,12 +2278,11 @@ void set_zone_contiguous(struct zone *zone)
>> if (!__pageblock_pfn_to_page(block_start_pfn,
>> block_end_pfn, zone))
>> - return;
>> + continue;
>> cond_resched();
>> + online_pages += block_end_pfn - block_start_pfn;
>
> I think we can completely get rid of this with something like this untested
> patch to calculate zone->online_pages for coldplug:
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index e33caa6fb6fc..ff2f75e7b49f 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -845,9 +845,9 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> * zone/node above the hole except for the trailing pages in the last
> * section that will be appended to the zone/node below.
> */
> -static void __init init_unavailable_range(unsigned long spfn,
> - unsigned long epfn,
> - int zone, int node)
> +static u64 __init init_unavailable_range(unsigned long spfn,
> + unsigned long epfn,
> + int zone, int node)
> {
> unsigned long pfn;
> u64 pgcnt = 0;
> @@ -861,6 +861,8 @@ static void __init init_unavailable_range(unsigned long spfn,
> if (pgcnt)
> pr_info("On node %d, zone %s: %lld pages in unavailable ranges\n",
> node, zone_names[zone], pgcnt);
> +
> + return pgcnt;
> }
>
> /*
> @@ -959,9 +961,10 @@ static void __init memmap_init_zone_range(struct zone *zone,
> memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
> zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE,
> false);
> + zone->online_pages += (end_pfn - start_pfn);
>
> if (*hole_pfn < start_pfn)
> - init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
> + zone->online_pages += init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
>
> *hole_pfn = end_pfn;
> }
>
Looking at set_zone_contiguous(), __pageblock_pfn_to_page() takes care
of a weird case where the end of a zone falls into the middle of a
pageblock.
I am not even sure if that is possible, but we could handle that easily
in pageblock_pfn_to_page() by checking the requested range against the
zone spanned range.
Then the semantics "zone->online_pages" would be less weird and more
closely resemble "pages with online memmap".
init_unavailable_range() might indeed do the trick!
@Tianyou, can you explore that direction? I know, your PTO is coming up.
--
Cheers,
David
next prev parent reply other threads:[~2026-02-11 12:20 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-30 16:37 [PATCH v9 0/2] Optimize zone->contiguous update Tianyou Li
2026-01-30 16:37 ` [PATCH v9 1/2] mm/memory hotplug/unplug: Add online_memory_block_pages() and offline_memory_block_pages() Tianyou Li
2026-01-30 16:37 ` [PATCH v9 2/2] mm/memory hotplug/unplug: Optimize zone->contiguous update when changes pfn range Tianyou Li
2026-02-07 11:00 ` David Hildenbrand (Arm)
2026-02-08 19:39 ` Mike Rapoport
2026-02-09 10:52 ` David Hildenbrand (Arm)
2026-02-09 12:44 ` David Hildenbrand (Arm)
2026-02-10 11:44 ` Mike Rapoport
2026-02-10 15:28 ` Li, Tianyou
2026-02-11 12:19 ` David Hildenbrand (Arm) [this message]
2026-02-12 8:32 ` Mike Rapoport
2026-02-12 8:45 ` David Hildenbrand (Arm)
2026-02-09 11:38 ` David Hildenbrand (Arm)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b3777d0e-0b25-45bc-b387-e67fe5a0d328@kernel.org \
--to=david@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=nanhai.zou@intel.com \
--cc=osalvador@suse.de \
--cc=pan.deng@intel.com \
--cc=qiuxu.zhuo@intel.com \
--cc=richard.weiyang@gmail.com \
--cc=rppt@kernel.org \
--cc=tianyou.li@intel.com \
--cc=tim.c.chen@linux.intel.com \
--cc=yong.hu@intel.com \
--cc=yu.c.chen@intel.com \
--cc=yuan1.liu@intel.com \
--cc=zhangchen.kidd@jd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox