linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Brendan Jackman <jackmanb@google.com>
To: David Hildenbrand <david@redhat.com>,
	Oscar Salvador <osalvador@suse.de>,
	 Andrew Morton <akpm@linux-foundation.org>,
	Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	 Vlastimil Babka <vbabka@suse.cz>,
	Pavel Tatashin <pasha.tatashin@soleen.com>,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org,
	Brendan Jackman <jackmanb@google.com>
Subject: [PATCH 2/2] mm,memory_hotplug: {READ,WRITE}_ONCE unsynchronized zone data
Date: Tue, 21 May 2024 12:57:19 +0000	[thread overview]
Message-ID: <20240521-mm-hotplug-sync-v1-2-6d53706c1ba8@google.com> (raw)
In-Reply-To: <20240521-mm-hotplug-sync-v1-0-6d53706c1ba8@google.com>

These fields are written by memory hotplug under mem_hotplug_lock but
read without any lock. It seems like reader code is robust against the
value being stale or "from the future", but we also need to account
for:

1. Load/store tearing (according to Linus[1], this really happens,
   even when everything is aligned as you would hope).

2. Invented loads[2] - the compiler can spill and re-read these fields
   ([2] calls this "invented loads") and assume that they have not
   changed.

Note we don't need READ_ONCE in paths that have the mem_hotplug_lock
for write, but we still need WRITE_ONCE to prevent store-tearing.

[1] https://lore.kernel.org/all/CAHk-=wj2t+GK+DGQ7Xy6U7zMf72e7Jkxn4_-kGyfH3WFEoH+YQ@mail.gmail.com/T/#u
    As discovered via the original big-bad article[2]
[2] https://lwn.net/Articles/793253/

Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 include/linux/mmzone.h | 14 ++++++++++----
 mm/compaction.c        |  2 +-
 mm/memory_hotplug.c    | 20 ++++++++++++--------
 mm/mm_init.c           |  2 +-
 mm/page_alloc.c        |  2 +-
 mm/show_mem.c          |  8 ++++----
 mm/vmstat.c            |  4 ++--
 7 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 194ef7fed9d6..bdb3be76d10c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1018,11 +1018,13 @@ static inline unsigned long zone_cma_pages(struct zone *zone)
 #endif
 }
 
+/* This is unstable unless you hold mem_hotplug_lock. */
 static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
-	return zone->zone_start_pfn + zone->spanned_pages;
+	return zone->zone_start_pfn + READ_ONCE(zone->spanned_pages);
 }
 
+/* This is unstable unless you hold mem_hotplug_lock. */
 static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
 {
 	return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
@@ -1033,9 +1035,10 @@ static inline bool zone_is_initialized(struct zone *zone)
 	return zone->initialized;
 }
 
+/* This is unstable unless you hold mem_hotplug_lock. */
 static inline bool zone_is_empty(struct zone *zone)
 {
-	return zone->spanned_pages == 0;
+	return READ_ONCE(zone->spanned_pages) == 0;
 }
 
 #ifndef BUILD_VDSO32_64
@@ -1485,10 +1488,13 @@ static inline bool managed_zone(struct zone *zone)
 	return zone_managed_pages(zone);
 }
 
-/* Returns true if a zone has memory */
+/*
+ * Returns true if a zone has memory.
+ * This is unstable unless you old mem_hotplug_lock.
+ */
 static inline bool populated_zone(struct zone *zone)
 {
-	return zone->present_pages;
+	return READ_ONCE(zone->present_pages);
 }
 
 #ifdef CONFIG_NUMA
diff --git a/mm/compaction.c b/mm/compaction.c
index e731d45befc7..b8066d1fdcf5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2239,7 +2239,7 @@ static unsigned int fragmentation_score_zone_weighted(struct zone *zone)
 {
 	unsigned long score;
 
-	score = zone->present_pages * fragmentation_score_zone(zone);
+	score = READ_ONCE(zone->present_pages) * fragmentation_score_zone(zone);
 	return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
 }
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 431b1f6753c0..71b5e3d314a2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -463,6 +463,8 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
 	int nid = zone_to_nid(zone);
 
 	if (zone->zone_start_pfn == start_pfn) {
+		unsigned long old_end_pfn = zone_end_pfn(zone);
+
 		/*
 		 * If the section is smallest section in the zone, it need
 		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
@@ -470,13 +472,13 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
 		 * for shrinking zone.
 		 */
 		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
-						zone_end_pfn(zone));
+						old_end_pfn);
 		if (pfn) {
-			zone->spanned_pages = zone_end_pfn(zone) - pfn;
+			WRITE_ONCE(zone->spanned_pages, old_end_pfn - pfn);
 			zone->zone_start_pfn = pfn;
 		} else {
 			zone->zone_start_pfn = 0;
-			zone->spanned_pages = 0;
+			WRITE_ONCE(zone->spanned_pages, 0);
 		}
 	} else if (zone_end_pfn(zone) == end_pfn) {
 		/*
@@ -488,10 +490,11 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
 		pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
 					       start_pfn);
 		if (pfn)
-			zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
+			WRITE_ONCE(zone->spanned_pages,
+				   pfn - zone->zone_start_pfn + 1);
 		else {
 			zone->zone_start_pfn = 0;
-			zone->spanned_pages = 0;
+			WRITE_ONCE(zone->spanned_pages, 0);
 		}
 	}
 }
@@ -710,7 +713,8 @@ static void __meminit resize_zone_range(struct zone *zone, unsigned long start_p
 	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
 		zone->zone_start_pfn = start_pfn;
 
-	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
+	WRITE_ONCE(zone->spanned_pages,
+		   max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn);
 }
 
 static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
@@ -795,7 +799,7 @@ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
 					    struct zone *zone)
 {
 	if (zone_idx(zone) == ZONE_MOVABLE) {
-		stats->movable_pages += zone->present_pages;
+		stats->movable_pages += READ_ONCE(zone->present_pages);
 	} else {
 		stats->kernel_early_pages += zone->present_early_pages;
 #ifdef CONFIG_CMA
@@ -1077,7 +1081,7 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
 	 */
 	if (early_section(__pfn_to_section(page_to_pfn(page))))
 		zone->present_early_pages += nr_pages;
-	zone->present_pages += nr_pages;
+	WRITE_ONCE(zone->present_pages, zone->present_pages + nr_pages);
 	zone->zone_pgdat->node_present_pages += nr_pages;
 
 	if (group && movable)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c725618aeb58..ec66f2eadb95 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1540,7 +1540,7 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
 	for (z = 0; z < MAX_NR_ZONES; z++) {
 		struct zone *zone = pgdat->node_zones + z;
 
-		zone->present_pages = 0;
+		WRITE_ONCE(zone->present_pages, 0);
 		zone_init_internals(zone, z, nid, 0);
 	}
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5116a2b9ea6e..1eb9000ec7d7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5728,7 +5728,7 @@ __meminit void zone_pcp_init(struct zone *zone)
 
 	if (populated_zone(zone))
 		pr_debug("  %s zone: %lu pages, LIFO batch:%u\n", zone->name,
-			 zone->present_pages, zone_batchsize(zone));
+			 READ_ONCE(zone->present_pages), zone_batchsize(zone));
 }
 
 void adjust_managed_page_count(struct page *page, long count)
diff --git a/mm/show_mem.c b/mm/show_mem.c
index bdb439551eef..667680a6107b 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -337,7 +337,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 			K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
 			K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
 			K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
-			K(zone->present_pages),
+			K(READ_ONCE(zone->present_pages)),
 			K(zone_managed_pages(zone)),
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_BOUNCE)),
@@ -407,11 +407,11 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 
 	for_each_populated_zone(zone) {
 
-		total += zone->present_pages;
-		reserved += zone->present_pages - zone_managed_pages(zone);
+		total += READ_ONCE(zone->present_pages);
+		reserved += READ_ONCE(zone->present_pages) - zone_managed_pages(zone);
 
 		if (is_highmem(zone))
-			highmem += zone->present_pages;
+			highmem += READ_ONCE(zone->present_pages);
 	}
 
 	printk("%lu pages RAM\n", total);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8507c497218b..5a9c4b5768e5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1708,8 +1708,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
-		   zone->spanned_pages,
-		   zone->present_pages,
+		   READ_ONCE(zone->spanned_pages),
+		   READ_ONCE(zone->present_pages),
 		   zone_managed_pages(zone),
 		   zone_cma_pages(zone));
 

-- 
2.45.0.rc1.225.g2a3ae87e7f-goog



  parent reply	other threads:[~2024-05-21 12:57 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-21 12:57 [PATCH 0/2] Clean up hotplug zone data synchronization Brendan Jackman
2024-05-21 12:57 ` [PATCH 1/2] mm,memory_hotplug: Remove un-taken lock Brendan Jackman
     [not found]   ` <78e646af-e8b5-4596-8fbf-17b139cfdddd@redhat.com>
2024-05-22 14:27     ` Brendan Jackman
2024-05-22 15:24       ` David Hildenbrand
2024-05-24 12:02         ` Brendan Jackman
2024-05-27  7:53           ` David Hildenbrand
2024-05-21 12:57 ` Brendan Jackman [this message]
2024-05-22  4:25   ` [PATCH 2/2] mm,memory_hotplug: {READ,WRITE}_ONCE unsynchronized zone data Lance Yang
2024-05-22  8:38     ` Brendan Jackman
2024-05-22  9:20       ` Lance Yang
2024-05-22 10:10         ` Brendan Jackman
2024-05-22 11:23           ` Lance Yang
2024-05-22  8:42   ` Brendan Jackman
2024-05-22 14:05   ` David Hildenbrand
2024-05-22 14:11     ` Brendan Jackman
2024-05-31 16:41       ` Brendan Jackman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240521-mm-hotplug-sync-v1-2-6d53706c1ba8@google.com \
    --to=jackmanb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    --cc=rppt@kernel.org \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox