linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: David Hildenbrand <david@redhat.com>, linux-mm@kvack.org
Cc: Matthew Wilcox <willy@infradead.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Michal Hocko <mhocko@kernel.org>,
	John Hubbard <jhubbard@nvidia.com>,
	linux-kernel@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 03/15] mm: check pfn validity when buddy allocator can merge pages across mem sections.
Date: Thu,  5 Aug 2021 15:02:41 -0400	[thread overview]
Message-ID: <20210805190253.2795604-4-zi.yan@sent.com> (raw)
In-Reply-To: <20210805190253.2795604-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

When MAX_ORDER - 1 + PAGE_SHIFT > SECTION_SIZE_BITS, it is possible to
have holes in memory zones. Use pfn_valid to check holes during buddy
page merging and physical frame scanning.

Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h | 13 +++++++++++++
 mm/compaction.c        | 20 +++++++++++++-------
 mm/memory_hotplug.c    |  7 +++++++
 mm/page_alloc.c        | 26 ++++++++++++++++++++++++--
 mm/page_isolation.c    |  7 ++++++-
 mm/page_owner.c        | 14 +++++++++++++-
 6 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 98e3297b9e09..04f790ed81b7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1535,6 +1535,19 @@ void sparse_init(void);
 #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * If it is possible to have holes within a MAX_ORDER_NR_PAGES when
+ * MAX_ORDER_NR_PAGES crosses multiple memory sections, then we
+ * need to check pfn validity within each MAX_ORDER_NR_PAGES block.
+ * pfn_valid_within() should be used in this case; we optimise this away
+ * when we have no holes within a MAX_ORDER_NR_PAGES block.
+ */
+#if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#define pfn_valid_within(pfn) pfn_valid(pfn)
+#else
+#define pfn_valid_within(pfn) (1)
+#endif
+
 #endif /* !__GENERATING_BOUNDS.H */
 #endif /* !__ASSEMBLY__ */
 #endif /* _LINUX_MMZONE_H */
diff --git a/mm/compaction.c b/mm/compaction.c
index fbc60f964c38..dda640d51b70 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -306,14 +306,16 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 	 * is necessary for the block to be a migration source/target.
 	 */
 	do {
-		if (check_source && PageLRU(page)) {
-			clear_pageblock_skip(page);
-			return true;
-		}
+		if (pfn_valid_within(pfn)) {
+			if (check_source && PageLRU(page)) {
+				clear_pageblock_skip(page);
+				return true;
+			}
 
-		if (check_target && PageBuddy(page)) {
-			clear_pageblock_skip(page);
-			return true;
+			if (check_target && PageBuddy(page)) {
+				clear_pageblock_skip(page);
+				return true;
+			}
 		}
 
 		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@ -583,6 +585,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 			break;
 
 		nr_scanned++;
+		if (!pfn_valid_within(blockpfn))
+			goto isolate_fail;
 
 		/*
 		 * For compound pages such as THP and hugetlbfs, we can save
@@ -881,6 +885,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			cond_resched();
 		}
 
+		if (!pfn_valid_within(low_pfn))
+			goto isolate_fail;
 		nr_scanned++;
 
 		page = pfn_to_page(low_pfn);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 632cd832aef6..85029994a494 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1617,6 +1617,13 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn,
 			continue;
 		for (; pfn < sec_end_pfn && pfn < end_pfn;
 		     pfn += MAX_ORDER_NR_PAGES) {
+			int i = 0;
+
+			while ((i < MAX_ORDER_NR_PAGES) &&
+				!pfn_valid_within(pfn + i))
+				i++;
+			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
+				continue;
 			/* Check if we got outside of the zone */
 			if (zone && !zone_spans_pfn(zone, pfn))
 				return NULL;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 416859e94f86..e4657009fd4f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -594,6 +594,8 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 
 static int page_is_consistent(struct zone *zone, struct page *page)
 {
+	if (!pfn_valid_within(page_to_pfn(page)))
+		return 0;
 	if (zone != page_zone(page))
 		return 0;
 
@@ -1023,12 +1025,16 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
 	if (order >= MAX_ORDER - 2)
 		return false;
 
+	if (!pfn_valid_within(buddy_pfn))
+		return false;
+
 	combined_pfn = buddy_pfn & pfn;
 	higher_page = page + (combined_pfn - pfn);
 	buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
 	higher_buddy = higher_page + (buddy_pfn - combined_pfn);
 
-	return page_is_buddy(higher_page, higher_buddy, order + 1);
+	return pfn_valid_within(buddy_pfn) &&
+	       page_is_buddy(higher_page, higher_buddy, order + 1);
 }
 
 /*
@@ -1089,6 +1095,8 @@ static inline void __free_one_page(struct page *page,
 		buddy_pfn = __find_buddy_pfn(pfn, order);
 		buddy = page + (buddy_pfn - pfn);
 
+		if (!pfn_valid_within(buddy_pfn))
+			goto done_merging;
 		if (!page_is_buddy(page, buddy, order))
 			goto done_merging;
 		/*
@@ -1118,6 +1126,9 @@ static inline void __free_one_page(struct page *page,
 
 			buddy_pfn = __find_buddy_pfn(pfn, order);
 			buddy = page + (buddy_pfn - pfn);
+
+			if (!pfn_valid_within(buddy_pfn))
+				goto done_merging;
 			buddy_mt = get_pageblock_migratetype(buddy);
 
 			if (migratetype != buddy_mt
@@ -1746,7 +1757,8 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
 /*
  * Check that the whole (or subset of) a pageblock given by the interval of
  * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner.
+ * with the migration of free compaction scanner. The scanners then need to use
+ * only pfn_valid_within() check for holes within pageblocks.
  *
  * Return struct page pointer of start_pfn, or NULL if checks were not passed.
  *
@@ -1862,6 +1874,8 @@ static inline void __init pgdat_init_report_one_done(void)
  */
 static inline bool __init deferred_pfn_valid(unsigned long pfn)
 {
+	if (!pfn_valid_within(pfn))
+		return false;
 	if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
 		return false;
 	return true;
@@ -2508,6 +2522,11 @@ static int move_freepages(struct zone *zone,
 	int pages_moved = 0;
 
 	for (pfn = start_pfn; pfn <= end_pfn;) {
+		if (!pfn_valid_within(pfn)) {
+			pfn++;
+			continue;
+		}
+
 		page = pfn_to_page(pfn);
 		if (!PageBuddy(page)) {
 			/*
@@ -8825,6 +8844,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
 	}
 
 	for (; iter < pageblock_nr_pages - offset; iter++) {
+		if (!pfn_valid_within(pfn + iter))
+			continue;
+
 		page = pfn_to_page(pfn + iter);
 
 		/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 471e3a13b541..bddf788f45bf 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -93,7 +93,8 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 			buddy_pfn = __find_buddy_pfn(pfn, order);
 			buddy = page + (buddy_pfn - pfn);
 
-			if (!is_migrate_isolate_page(buddy)) {
+			if (pfn_valid_within(buddy_pfn) &&
+			    !is_migrate_isolate_page(buddy)) {
 				__isolate_free_page(page, order);
 				isolated_page = true;
 			}
@@ -249,6 +250,10 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
 	struct page *page;
 
 	while (pfn < end_pfn) {
+		if (!pfn_valid_within(pfn)) {
+			pfn++;
+			continue;
+		}
 		page = pfn_to_page(pfn);
 		if (PageBuddy(page))
 			/*
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d24ed221357c..23bfb074ca3f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -276,6 +276,9 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 		pageblock_mt = get_pageblock_migratetype(page);
 
 		for (; pfn < block_end_pfn; pfn++) {
+			if (!pfn_valid_within(pfn))
+				continue;
+
 			/* The pageblock is online, no need to recheck. */
 			page = pfn_to_page(pfn);
 
@@ -476,6 +479,10 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 			continue;
 		}
 
+		/* Check for holes within a MAX_ORDER area */
+		if (!pfn_valid_within(pfn))
+			continue;
+
 		page = pfn_to_page(pfn);
 		if (PageBuddy(page)) {
 			unsigned long freepage_order = buddy_order_unsafe(page);
@@ -553,9 +560,14 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
 		block_end_pfn = min(block_end_pfn, end_pfn);
 
 		for (; pfn < block_end_pfn; pfn++) {
-			struct page *page = pfn_to_page(pfn);
+			struct page *page;
 			struct page_ext *page_ext;
 
+			if (!pfn_valid_within(pfn))
+				continue;
+
+			page = pfn_to_page(pfn);
+
 			if (page_zone(page) != zone)
 				continue;
 
-- 
2.30.2



  parent reply	other threads:[~2021-08-05 19:03 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-05 19:02 [RFC PATCH 00/15] Make MAX_ORDER adjustable as a kernel boot time parameter Zi Yan
2021-08-05 19:02 ` [RFC PATCH 01/15] arch: x86: remove MAX_ORDER exceeding SECTION_SIZE check for 32bit vdso Zi Yan
2021-08-05 19:02 ` [RFC PATCH 02/15] arch: mm: rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER Zi Yan
2021-08-05 19:02 ` Zi Yan [this message]
2021-08-05 19:02 ` [RFC PATCH 04/15] mm: prevent pageblock size being larger than section size Zi Yan
2021-08-05 19:02 ` [RFC PATCH 05/15] mm/memory_hotplug: online pages at " Zi Yan
2021-08-05 19:02 ` [RFC PATCH 06/15] mm: use PAGES_PER_SECTION instead for mem_map_offset/next() Zi Yan
2021-08-05 19:02 ` [RFC PATCH 07/15] mm: hugetlb: use PAGES_PER_SECTION to check mem_map discontiguity Zi Yan
2021-08-05 19:02 ` [RFC PATCH 08/15] fs: proc: use PAGES_PER_SECTION for page offline checking period Zi Yan
2021-08-07 10:32   ` Mike Rapoport
2021-08-09 15:45     ` [RFC PATCH 08/15] " Zi Yan
2021-08-05 19:02 ` [RFC PATCH 09/15] virtio: virtio_mem: use PAGES_PER_SECTION instead of MAX_ORDER_NR_PAGES Zi Yan
2021-08-09  7:35   ` David Hildenbrand
2021-08-05 19:02 ` [RFC PATCH 10/15] virtio: virtio_balloon: " Zi Yan
2021-08-09  7:42   ` David Hildenbrand
2021-08-05 19:02 ` [RFC PATCH 11/15] mm/page_reporting: report pages at section size instead of MAX_ORDER Zi Yan
2021-08-09  7:25   ` David Hildenbrand
2021-08-09 14:12     ` Alexander Duyck
2021-08-09 15:08       ` Zi Yan
2021-08-09 16:51         ` Alexander Duyck
2021-08-09 14:08   ` Alexander Duyck
2021-08-05 19:02 ` [RFC PATCH 12/15] mm: Make MAX_ORDER of buddy allocator configurable via Kconfig SET_MAX_ORDER Zi Yan
2021-08-06 15:16   ` Vlastimil Babka
2021-08-06 15:23     ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 13/15] mm: convert MAX_ORDER sized static arrays to dynamic ones Zi Yan
2021-08-05 19:16   ` Christian König
2021-08-05 19:58     ` Zi Yan
2021-08-06  9:37       ` Christian König
2021-08-06 14:00         ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 14/15] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant Zi Yan
2021-08-08  8:23   ` Mike Rapoport
2021-08-09 15:35     ` Zi Yan
2021-08-05 19:02 ` [RFC PATCH 15/15] mm: make MAX_ORDER a kernel boot time parameter Zi Yan
2021-08-06 15:36 ` [RFC PATCH 00/15] Make MAX_ORDER adjustable as " Vlastimil Babka
2021-08-06 16:16   ` David Hildenbrand
2021-08-06 16:54     ` Vlastimil Babka
2021-08-06 17:08       ` David Hildenbrand
2021-08-06 18:24         ` Zi Yan
2021-08-09  7:20           ` David Hildenbrand
2021-08-08  7:41       ` Mike Rapoport
2021-08-06 16:32 ` Vlastimil Babka
2021-08-06 17:19   ` Zi Yan
2021-08-06 20:27     ` Hugh Dickins
2021-08-06 21:26       ` Zi Yan
2021-08-09  4:04         ` Hugh Dickins
2021-08-07  1:10       ` Matthew Wilcox
2021-08-07 21:23         ` Matthew Wilcox
2021-08-09  4:29         ` Hugh Dickins
2021-08-09 11:22           ` Matthew Wilcox
2021-08-09  7:41 ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210805190253.2795604-4-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=david@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox