From: Zi Yan <zi.yan@sent.com>
To: David Hildenbrand <david@redhat.com>, linux-mm@kvack.org
Cc: Matthew Wilcox <willy@infradead.org>,
Vlastimil Babka <vbabka@suse.cz>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
Michal Hocko <mhocko@kernel.org>,
John Hubbard <jhubbard@nvidia.com>,
linux-kernel@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 03/15] mm: check pfn validity when buddy allocator can merge pages across mem sections.
Date: Thu, 5 Aug 2021 15:02:41 -0400 [thread overview]
Message-ID: <20210805190253.2795604-4-zi.yan@sent.com> (raw)
In-Reply-To: <20210805190253.2795604-1-zi.yan@sent.com>
From: Zi Yan <ziy@nvidia.com>
When MAX_ORDER - 1 + PAGE_SHIFT > SECTION_SIZE_BITS, it is possible to
have holes in memory zones. Use pfn_valid to check holes during buddy
page merging and physical frame scanning.
Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
include/linux/mmzone.h | 13 +++++++++++++
mm/compaction.c | 20 +++++++++++++-------
mm/memory_hotplug.c | 7 +++++++
mm/page_alloc.c | 26 ++++++++++++++++++++++++--
mm/page_isolation.c | 7 ++++++-
mm/page_owner.c | 14 +++++++++++++-
6 files changed, 76 insertions(+), 11 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 98e3297b9e09..04f790ed81b7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1535,6 +1535,19 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
+/*
+ * If it is possible to have holes within a MAX_ORDER_NR_PAGES when
+ * MAX_ORDER_NR_PAGES crosses multiple memory sections, then we
+ * need to check pfn validity within each MAX_ORDER_NR_PAGES block.
+ * pfn_valid_within() should be used in this case; we optimise this away
+ * when we have no holes within a MAX_ORDER_NR_PAGES block.
+ */
+#if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#define pfn_valid_within(pfn) pfn_valid(pfn)
+#else
+#define pfn_valid_within(pfn) (1)
+#endif
+
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/mm/compaction.c b/mm/compaction.c
index fbc60f964c38..dda640d51b70 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -306,14 +306,16 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
* is necessary for the block to be a migration source/target.
*/
do {
- if (check_source && PageLRU(page)) {
- clear_pageblock_skip(page);
- return true;
- }
+ if (pfn_valid_within(pfn)) {
+ if (check_source && PageLRU(page)) {
+ clear_pageblock_skip(page);
+ return true;
+ }
- if (check_target && PageBuddy(page)) {
- clear_pageblock_skip(page);
- return true;
+ if (check_target && PageBuddy(page)) {
+ clear_pageblock_skip(page);
+ return true;
+ }
}
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@ -583,6 +585,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
break;
nr_scanned++;
+ if (!pfn_valid_within(blockpfn))
+ goto isolate_fail;
/*
* For compound pages such as THP and hugetlbfs, we can save
@@ -881,6 +885,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
cond_resched();
}
+ if (!pfn_valid_within(low_pfn))
+ goto isolate_fail;
nr_scanned++;
page = pfn_to_page(low_pfn);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 632cd832aef6..85029994a494 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1617,6 +1617,13 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn,
continue;
for (; pfn < sec_end_pfn && pfn < end_pfn;
pfn += MAX_ORDER_NR_PAGES) {
+ int i = 0;
+
+ while ((i < MAX_ORDER_NR_PAGES) &&
+ !pfn_valid_within(pfn + i))
+ i++;
+ if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
+ continue;
/* Check if we got outside of the zone */
if (zone && !zone_spans_pfn(zone, pfn))
return NULL;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 416859e94f86..e4657009fd4f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -594,6 +594,8 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
static int page_is_consistent(struct zone *zone, struct page *page)
{
+ if (!pfn_valid_within(page_to_pfn(page)))
+ return 0;
if (zone != page_zone(page))
return 0;
@@ -1023,12 +1025,16 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
if (order >= MAX_ORDER - 2)
return false;
+ if (!pfn_valid_within(buddy_pfn))
+ return false;
+
combined_pfn = buddy_pfn & pfn;
higher_page = page + (combined_pfn - pfn);
buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
higher_buddy = higher_page + (buddy_pfn - combined_pfn);
- return page_is_buddy(higher_page, higher_buddy, order + 1);
+ return pfn_valid_within(buddy_pfn) &&
+ page_is_buddy(higher_page, higher_buddy, order + 1);
}
/*
@@ -1089,6 +1095,8 @@ static inline void __free_one_page(struct page *page,
buddy_pfn = __find_buddy_pfn(pfn, order);
buddy = page + (buddy_pfn - pfn);
+ if (!pfn_valid_within(buddy_pfn))
+ goto done_merging;
if (!page_is_buddy(page, buddy, order))
goto done_merging;
/*
@@ -1118,6 +1126,9 @@ static inline void __free_one_page(struct page *page,
buddy_pfn = __find_buddy_pfn(pfn, order);
buddy = page + (buddy_pfn - pfn);
+
+ if (!pfn_valid_within(buddy_pfn))
+ goto done_merging;
buddy_mt = get_pageblock_migratetype(buddy);
if (migratetype != buddy_mt
@@ -1746,7 +1757,8 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
/*
* Check that the whole (or subset of) a pageblock given by the interval of
* [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner.
+ * with the migration of free compaction scanner. The scanners then need to use
+ * only pfn_valid_within() check for holes within pageblocks.
*
* Return struct page pointer of start_pfn, or NULL if checks were not passed.
*
@@ -1862,6 +1874,8 @@ static inline void __init pgdat_init_report_one_done(void)
*/
static inline bool __init deferred_pfn_valid(unsigned long pfn)
{
+ if (!pfn_valid_within(pfn))
+ return false;
if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
return false;
return true;
@@ -2508,6 +2522,11 @@ static int move_freepages(struct zone *zone,
int pages_moved = 0;
for (pfn = start_pfn; pfn <= end_pfn;) {
+ if (!pfn_valid_within(pfn)) {
+ pfn++;
+ continue;
+ }
+
page = pfn_to_page(pfn);
if (!PageBuddy(page)) {
/*
@@ -8825,6 +8844,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
}
for (; iter < pageblock_nr_pages - offset; iter++) {
+ if (!pfn_valid_within(pfn + iter))
+ continue;
+
page = pfn_to_page(pfn + iter);
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 471e3a13b541..bddf788f45bf 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -93,7 +93,8 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
buddy_pfn = __find_buddy_pfn(pfn, order);
buddy = page + (buddy_pfn - pfn);
- if (!is_migrate_isolate_page(buddy)) {
+ if (pfn_valid_within(buddy_pfn) &&
+ !is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
isolated_page = true;
}
@@ -249,6 +250,10 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
struct page *page;
while (pfn < end_pfn) {
+ if (!pfn_valid_within(pfn)) {
+ pfn++;
+ continue;
+ }
page = pfn_to_page(pfn);
if (PageBuddy(page))
/*
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d24ed221357c..23bfb074ca3f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -276,6 +276,9 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
pageblock_mt = get_pageblock_migratetype(page);
for (; pfn < block_end_pfn; pfn++) {
+ if (!pfn_valid_within(pfn))
+ continue;
+
/* The pageblock is online, no need to recheck. */
page = pfn_to_page(pfn);
@@ -476,6 +479,10 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
continue;
}
+ /* Check for holes within a MAX_ORDER area */
+ if (!pfn_valid_within(pfn))
+ continue;
+
page = pfn_to_page(pfn);
if (PageBuddy(page)) {
unsigned long freepage_order = buddy_order_unsafe(page);
@@ -553,9 +560,14 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
block_end_pfn = min(block_end_pfn, end_pfn);
for (; pfn < block_end_pfn; pfn++) {
- struct page *page = pfn_to_page(pfn);
+ struct page *page;
struct page_ext *page_ext;
+ if (!pfn_valid_within(pfn))
+ continue;
+
+ page = pfn_to_page(pfn);
+
if (page_zone(page) != zone)
continue;
--
2.30.2
next prev parent reply other threads:[~2021-08-05 19:03 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-05 19:02 [RFC PATCH 00/15] Make MAX_ORDER adjustable as a kernel boot time parameter Zi Yan
2021-08-05 19:02 ` [RFC PATCH 01/15] arch: x86: remove MAX_ORDER exceeding SECTION_SIZE check for 32bit vdso Zi Yan
2021-08-05 19:02 ` [RFC PATCH 02/15] arch: mm: rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER Zi Yan
2021-08-05 19:02 ` Zi Yan [this message]
2021-08-05 19:02 ` [RFC PATCH 04/15] mm: prevent pageblock size being larger than section size Zi Yan
2021-08-05 19:02 ` [RFC PATCH 05/15] mm/memory_hotplug: online pages at " Zi Yan
2021-08-05 19:02 ` [RFC PATCH 06/15] mm: use PAGES_PER_SECTION instead for mem_map_offset/next() Zi Yan
2021-08-05 19:02 ` [RFC PATCH 07/15] mm: hugetlb: use PAGES_PER_SECTION to check mem_map discontiguity Zi Yan
2021-08-05 19:02 ` [RFC PATCH 08/15] fs: proc: use PAGES_PER_SECTION for page offline checking period Zi Yan
2021-08-07 10:32 ` Mike Rapoport
2021-08-09 15:45 ` [RFC PATCH 08/15] " Zi Yan
2021-08-05 19:02 ` [RFC PATCH 09/15] virtio: virtio_mem: use PAGES_PER_SECTION instead of MAX_ORDER_NR_PAGES Zi Yan
2021-08-09 7:35 ` David Hildenbrand
2021-08-05 19:02 ` [RFC PATCH 10/15] virtio: virtio_balloon: " Zi Yan
2021-08-09 7:42 ` David Hildenbrand
2021-08-05 19:02 ` [RFC PATCH 11/15] mm/page_reporting: report pages at section size instead of MAX_ORDER Zi Yan
2021-08-09 7:25 ` David Hildenbrand
2021-08-09 14:12 ` Alexander Duyck
2021-08-09 15:08 ` Zi Yan
2021-08-09 16:51 ` Alexander Duyck
2021-08-09 14:08 ` Alexander Duyck
2021-08-05 19:02 ` [RFC PATCH 12/15] mm: Make MAX_ORDER of buddy allocator configurable via Kconfig SET_MAX_ORDER Zi Yan
2021-08-06 15:16 ` Vlastimil Babka
2021-08-06 15:23 ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 13/15] mm: convert MAX_ORDER sized static arrays to dynamic ones Zi Yan
2021-08-05 19:16 ` Christian König
2021-08-05 19:58 ` Zi Yan
2021-08-06 9:37 ` Christian König
2021-08-06 14:00 ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 14/15] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant Zi Yan
2021-08-08 8:23 ` Mike Rapoport
2021-08-09 15:35 ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 15/15] mm: make MAX_ORDER a kernel boot time parameter Zi Yan
2021-08-06 15:36 ` [RFC PATCH 00/15] Make MAX_ORDER adjustable as " Vlastimil Babka
2021-08-06 16:16 ` David Hildenbrand
2021-08-06 16:54 ` Vlastimil Babka
2021-08-06 17:08 ` David Hildenbrand
2021-08-06 18:24 ` Zi Yan
2021-08-09 7:20 ` David Hildenbrand
2021-08-08 7:41 ` Mike Rapoport
2021-08-06 16:32 ` Vlastimil Babka
2021-08-06 17:19 ` Zi Yan
2021-08-06 20:27 ` Hugh Dickins
2021-08-06 21:26 ` Zi Yan
2021-08-09 4:04 ` Hugh Dickins
2021-08-07 1:10 ` Matthew Wilcox
2021-08-07 21:23 ` Matthew Wilcox
2021-08-09 4:29 ` Hugh Dickins
2021-08-09 11:22 ` Matthew Wilcox
2021-08-09 7:41 ` David Hildenbrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210805190253.2795604-4-zi.yan@sent.com \
--to=zi.yan@sent.com \
--cc=david@redhat.com \
--cc=jhubbard@nvidia.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=mike.kravetz@oracle.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox