From: "Li, Tianyou" <tianyou.li@intel.com>
To: "David Hildenbrand (Arm)" <david@kernel.org>,
Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>,
Wei Yang <richard.weiyang@gmail.com>,
Michal Hocko <mhocko@suse.com>, <linux-mm@kvack.org>,
Yong Hu <yong.hu@intel.com>, Nanhai Zou <nanhai.zou@intel.com>,
Yuan Liu <yuan1.liu@intel.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Qiuxu Zhuo <qiuxu.zhuo@intel.com>,
Yu C Chen <yu.c.chen@intel.com>, Pan Deng <pan.deng@intel.com>,
Chen Zhang <zhangchen.kidd@jd.com>,
<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v9 2/2] mm/memory hotplug/unplug: Optimize zone->contiguous update when changes pfn range
Date: Tue, 24 Feb 2026 15:15:02 +0800 [thread overview]
Message-ID: <de501b65-6d02-4eeb-b50e-bfe5ab9b8d00@intel.com> (raw)
In-Reply-To: <b3777d0e-0b25-45bc-b387-e67fe5a0d328@kernel.org>
On 2/11/2026 8:19 PM, David Hildenbrand (Arm) wrote:
>>> *
>>> + * online_pages is pages within the zone that have an online
>>> memmap.
>>> + * online_pages include present pages and memory holes that have a
>>> + * memmap. When spanned_pages == online_pages, pfn_to_page()
>>> can be
>>> + * performed without further checks on any pfn within the zone
>>> span.
>>
>> Maybe pages_with_memmap? It would stand off from managed, spanned and
>> present, but it's clearer than online IMHO.
>
> offline pages also have a memmap, but that should not be touched as it
> might contain garbage. So it's a bit more tricky :)
>
>>
>>> + *
>>> * So present_pages may be used by memory hotplug or memory power
>>> * management logic to figure out unmanaged pages by checking
>>> * (present_pages - managed_pages). And managed_pages should
>>> be used
>>> @@ -967,6 +972,7 @@ struct zone {
>>> atomic_long_t managed_pages;
>>> unsigned long spanned_pages;
>>> unsigned long present_pages;
>>> + unsigned long online_pages;
>>> #if defined(CONFIG_MEMORY_HOTPLUG)
>>> unsigned long present_early_pages;
>>> #endif
>>> @@ -1051,8 +1057,6 @@ struct zone {
>>> bool compact_blockskip_flush;
>>> #endif
>>> - bool contiguous;
>>> -
>>> CACHELINE_PADDING(_pad3_);
>>> /* Zone statistics */
>>> atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
>>> @@ -1124,6 +1128,23 @@ static inline bool zone_spans_pfn(const
>>> struct zone *zone, unsigned long pfn)
>>> return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
>>> }
>>> +/**
>>> + * zone_is_contiguous - test whether a zone is contiguous
>>> + * @zone: the zone to test.
>>> + *
>>> + * In a contiguous zone, it is valid to call pfn_to_page() on any
>>> pfn in the
>>> + * spanned zone without requiting pfn_valid() or
>>> pfn_to_online_page() checks.
>>> + *
>>> + * Returns: true if contiguous, otherwise false.
>>> + */
>>> +static inline bool zone_is_contiguous(const struct zone *zone)
>>> +{
>>> + return READ_ONCE(zone->spanned_pages) ==
>>> READ_ONCE(zone->online_pages);
>>> +}
>>> +
>>> static inline bool zone_is_initialized(const struct zone *zone)
>>> {
>>> return zone->initialized;
>>> diff --git a/mm/internal.h b/mm/internal.h
>>> index f35dbcf99a86..6062f9b8ee62 100644
>>> --- a/mm/internal.h
>>> +++ b/mm/internal.h
>>> @@ -716,21 +716,15 @@ extern struct page
>>> *__pageblock_pfn_to_page(unsigned long start_pfn,
>>> static inline struct page *pageblock_pfn_to_page(unsigned long
>>> start_pfn,
>>> unsigned long end_pfn, struct zone *zone)
>>> {
>>> - if (zone->contiguous)
>>> + if (zone_is_contiguous(zone))
>>> return pfn_to_page(start_pfn);
>>> return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
>>> }
>>> -void set_zone_contiguous(struct zone *zone);
>>> bool pfn_range_intersects_zones(int nid, unsigned long start_pfn,
>>> unsigned long nr_pages);
>>> -static inline void clear_zone_contiguous(struct zone *zone)
>>> -{
>>> - zone->contiguous = false;
>>> -}
>>> -
>>> extern int __isolate_free_page(struct page *page, unsigned int
>>> order);
>>> extern void __putback_isolated_page(struct page *page, unsigned
>>> int order,
>>> int mt);
>>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>>> index a63ec679d861..76496c1039a9 100644
>>> --- a/mm/memory_hotplug.c
>>> +++ b/mm/memory_hotplug.c
>>> @@ -492,11 +492,11 @@ static void shrink_zone_span(struct zone
>>> *zone, unsigned long start_pfn,
>>> pfn = find_smallest_section_pfn(nid, zone, end_pfn,
>>> zone_end_pfn(zone));
>>> if (pfn) {
>>> - zone->spanned_pages = zone_end_pfn(zone) - pfn;
>>> + WRITE_ONCE(zone->spanned_pages, zone_end_pfn(zone) - pfn);
>>> zone->zone_start_pfn = pfn;
>>> } else {
>>> zone->zone_start_pfn = 0;
>>> - zone->spanned_pages = 0;
>>> + WRITE_ONCE(zone->spanned_pages, 0);
>>> }
>>> } else if (zone_end_pfn(zone) == end_pfn) {
>>> /*
>>> @@ -508,10 +508,10 @@ static void shrink_zone_span(struct zone
>>> *zone, unsigned long start_pfn,
>>> pfn = find_biggest_section_pfn(nid, zone,
>>> zone->zone_start_pfn,
>>> start_pfn);
>>> if (pfn)
>>> - zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
>>> + WRITE_ONCE(zone->spanned_pages, pfn -
>>> zone->zone_start_pfn + 1);
>>> else {
>>> zone->zone_start_pfn = 0;
>>> - zone->spanned_pages = 0;
>>> + WRITE_ONCE(zone->spanned_pages, 0);
>>> }
>>> }
>>> }
>>> @@ -565,18 +565,13 @@ void remove_pfn_range_from_zone(struct zone
>>> *zone,
>>> /*
>>> * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
>>> - * we will not try to shrink the zones - which is okay as
>>> - * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
>>> + * we will not try to shrink the zones.
>>> */
>>> if (zone_is_zone_device(zone))
>>> return;
>>> - clear_zone_contiguous(zone);
>>> -
>>> shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
>>> update_pgdat_span(pgdat);
>>> -
>>> - set_zone_contiguous(zone);
>>> }
>>> /**
>>> @@ -753,8 +748,6 @@ void move_pfn_range_to_zone(struct zone *zone,
>>> unsigned long start_pfn,
>>> struct pglist_data *pgdat = zone->zone_pgdat;
>>> int nid = pgdat->node_id;
>>> - clear_zone_contiguous(zone);
>>> -
>>> if (zone_is_empty(zone))
>>> init_currently_empty_zone(zone, start_pfn, nr_pages);
>>> resize_zone_range(zone, start_pfn, nr_pages);
>>> @@ -782,8 +775,6 @@ void move_pfn_range_to_zone(struct zone *zone,
>>> unsigned long start_pfn,
>>> memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
>>> MEMINIT_HOTPLUG, altmap, migratetype,
>>> isolate_pageblock);
>>> -
>>> - set_zone_contiguous(zone);
>>> }
>>> struct auto_movable_stats {
>>> @@ -1079,6 +1070,7 @@ void adjust_present_page_count(struct page
>>> *page, struct memory_group *group,
>>> if (early_section(__pfn_to_section(page_to_pfn(page))))
>>> zone->present_early_pages += nr_pages;
>>> zone->present_pages += nr_pages;
>>> + WRITE_ONCE(zone->online_pages, zone->online_pages + nr_pages);
>>> zone->zone_pgdat->node_present_pages += nr_pages;
>>> if (group && movable)
>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>> index 2a809cd8e7fa..e33caa6fb6fc 100644
>>> --- a/mm/mm_init.c
>>> +++ b/mm/mm_init.c
>>> @@ -2263,9 +2263,10 @@ void __init init_cma_pageblock(struct page
>>> *page)
>>> }
>>> #endif
>>> -void set_zone_contiguous(struct zone *zone)
>>> +static void calc_online_pages(struct zone *zone)
>>> {
>>> unsigned long block_start_pfn = zone->zone_start_pfn;
>>> + unsigned long online_pages = 0;
>>> unsigned long block_end_pfn;
>>> block_end_pfn = pageblock_end_pfn(block_start_pfn);
>>> @@ -2277,12 +2278,11 @@ void set_zone_contiguous(struct zone *zone)
>>> if (!__pageblock_pfn_to_page(block_start_pfn,
>>> block_end_pfn, zone))
>>> - return;
>>> + continue;
>>> cond_resched();
>>> + online_pages += block_end_pfn - block_start_pfn;
>>
>> I think we can completely get rid of this with something like this
>> untested
>> patch to calculate zone->online_pages for coldplug:
>>
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index e33caa6fb6fc..ff2f75e7b49f 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -845,9 +845,9 @@ overlap_memmap_init(unsigned long zone, unsigned
>> long *pfn)
>> * zone/node above the hole except for the trailing pages in the
>> last
>> * section that will be appended to the zone/node below.
>> */
>> -static void __init init_unavailable_range(unsigned long spfn,
>> - unsigned long epfn,
>> - int zone, int node)
>> +static u64 __init init_unavailable_range(unsigned long spfn,
>> + unsigned long epfn,
>> + int zone, int node)
>> {
>> unsigned long pfn;
>> u64 pgcnt = 0;
>> @@ -861,6 +861,8 @@ static void __init
>> init_unavailable_range(unsigned long spfn,
>> if (pgcnt)
>> pr_info("On node %d, zone %s: %lld pages in unavailable
>> ranges\n",
>> node, zone_names[zone], pgcnt);
>> +
>> + return pgcnt;
>> }
>> /*
>> @@ -959,9 +961,10 @@ static void __init memmap_init_zone_range(struct
>> zone *zone,
>> memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
>> zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE,
>> false);
>> + zone->online_pages += (end_pfn - start_pfn);
>> if (*hole_pfn < start_pfn)
>> - init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
>> + zone->online_pages += init_unavailable_range(*hole_pfn,
>> start_pfn, zone_id, nid);
>> *hole_pfn = end_pfn;
>> }
>>
>
> Looking at set_zone_contiguous(), __pageblock_pfn_to_page() takes care
> of a weird case where the end of a zone falls into the middle of a
> pageblock.
>
> I am not even sure if that is possible, but we could handle that
> easily in pageblock_pfn_to_page() by checking the requested range
> against the zone spanned range.
>
> Then the semantics "zone->online_pages" would be less weird and more
> closely resemble "pages with online memmap".
>
> init_unavailable_range() might indeed do the trick!
>
> @Tianyou, can you explore that direction? I know, your PTO is coming up.
>
Hi David,
Sorry for the late response, just come back from CNY holiday. I am
willing to explore the new direction. Will sync with Yuan in a day or
two to confirm the overall implementation and testing plan. Thanks in
advance for your time to review the new patch set once ready.
Regards,
Tianyou
next prev parent reply other threads:[~2026-02-24 7:15 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-30 16:37 [PATCH v9 0/2] Optimize zone->contiguous update Tianyou Li
2026-01-30 16:37 ` [PATCH v9 1/2] mm/memory hotplug/unplug: Add online_memory_block_pages() and offline_memory_block_pages() Tianyou Li
2026-01-30 16:37 ` [PATCH v9 2/2] mm/memory hotplug/unplug: Optimize zone->contiguous update when changes pfn range Tianyou Li
2026-02-07 11:00 ` David Hildenbrand (Arm)
2026-02-08 19:39 ` Mike Rapoport
2026-02-09 10:52 ` David Hildenbrand (Arm)
2026-02-09 12:44 ` David Hildenbrand (Arm)
2026-02-10 11:44 ` Mike Rapoport
2026-02-10 15:28 ` Li, Tianyou
2026-02-11 12:19 ` David Hildenbrand (Arm)
2026-02-12 8:32 ` Mike Rapoport
2026-02-12 8:45 ` David Hildenbrand (Arm)
2026-02-24 7:15 ` Li, Tianyou [this message]
2026-02-09 11:38 ` David Hildenbrand (Arm)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=de501b65-6d02-4eeb-b50e-bfe5ab9b8d00@intel.com \
--to=tianyou.li@intel.com \
--cc=david@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=nanhai.zou@intel.com \
--cc=osalvador@suse.de \
--cc=pan.deng@intel.com \
--cc=qiuxu.zhuo@intel.com \
--cc=richard.weiyang@gmail.com \
--cc=rppt@kernel.org \
--cc=tim.c.chen@linux.intel.com \
--cc=yong.hu@intel.com \
--cc=yu.c.chen@intel.com \
--cc=yuan1.liu@intel.com \
--cc=zhangchen.kidd@jd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox