linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>,
	 Andrew Morton <akpm@linux-foundation.org>,
	"Borislav Petkov (AMD)" <bp@alien8.de>,
	 Mel Gorman <mgorman@suse.de>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	 Mike Rapoport <rppt@kernel.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	 Jianxiong Gao <jxgao@google.com>,
	stable@vger.kernel.org
Subject: Re: [PATCH] mm: Fix endless reclaim on machines with unaccepted memory.
Date: Tue, 23 Jul 2024 16:53:05 +0300	[thread overview]
Message-ID: <mdsrik4ryedfe62hnhokejq7botphyvodydcvsbcrdsmwufv7w@oasahvgppsbd> (raw)
In-Reply-To: <Zp-aIfs3DNhAVBmO@tiehlicka>

On Tue, Jul 23, 2024 at 01:55:13PM +0200, Michal Hocko wrote:
> On Tue 23-07-24 12:49:41, Kirill A. Shutemov wrote:
> > On Tue, Jul 23, 2024 at 09:30:27AM +0200, Vlastimil Babka wrote:
> [...]
> > > Although just removing the lazy accept mode would be much more appealing
> > > solution than this :)
> > 
> > :P
> > 
> > Not really an option for big VMs. It might add many minutes to boot time.
> 
> Well a huge part of that can be done in the background so the boot
> doesn't really have to wait for all of it. If we really have to start
> playing whack-a-mole to plug all the potential ways to trigger reclaim
> imbalance I think it is fair to re-evaluate how much lazy should the
> initialization really be.

One other option I see is to treat unaccepted memory as free, so
watermarks would not fail if we have unaccepted memory. No spinning in
kswapd in this case.

Only get_page_from_freelist() and __alloc_pages_bulk() is aware about
unaccepted memory.

The quick patch below shows the idea.

I am not sure how it would affect __isolate_free_page() callers. IIUC,
they expect to see pages on free lists, but might not find them there
in this scenario because they are not accepted yet.
I need to look closer at this.

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c11b7cde81ef..5e0bdfbe2f1f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -667,6 +667,7 @@ enum zone_watermarks {
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
 #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
+#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost)
 #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 14d39f34d336..254bfe29eaf1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -304,7 +304,7 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 static bool page_contains_unaccepted(struct page *page, unsigned int order);
 static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order);
 static inline bool has_unaccepted_memory(void);
 static bool __free_unaccepted(struct page *page);
 
@@ -2947,9 +2947,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
 	if (!(alloc_flags & ALLOC_CMA))
 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
-#ifdef CONFIG_UNACCEPTED_MEMORY
-	unusable_free += zone_page_state(z, NR_UNACCEPTED);
-#endif
 
 	return unusable_free;
 }
@@ -3243,6 +3240,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			}
 		}
 
+		cond_accept_memory(zone, order);
+
 		/*
 		 * Detect whether the number of free pages is below high
 		 * watermark.  If so, we will decrease pcp->high and free
@@ -3268,10 +3267,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 				       gfp_mask)) {
 			int ret;
 
-			if (has_unaccepted_memory()) {
-				if (try_to_accept_memory(zone, order))
-					goto try_this_zone;
-			}
+			if (cond_accept_memory(zone, order))
+				goto try_this_zone;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 			/*
@@ -3325,10 +3322,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 			return page;
 		} else {
-			if (has_unaccepted_memory()) {
-				if (try_to_accept_memory(zone, order))
-					goto try_this_zone;
-			}
+			if (cond_accept_memory(zone, order))
+				goto try_this_zone;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 			/* Try again if zone has deferred pages */
@@ -4456,12 +4451,25 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 			goto failed;
 		}
 
+		cond_accept_memory(zone, 0);
+retry_this_zone:
 		mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
 		if (zone_watermark_fast(zone, 0,  mark,
 				zonelist_zone_idx(ac.preferred_zoneref),
 				alloc_flags, gfp)) {
 			break;
 		}
+
+		if (cond_accept_memory(zone, 0))
+			goto retry_this_zone;
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+		/* Try again if zone has deferred pages */
+		if (deferred_pages_enabled()) {
+			if (_deferred_grow_zone(zone, 0))
+				goto retry_this_zone;
+		}
+#endif
 	}
 
 	/*
@@ -6833,9 +6841,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
 	struct page *page;
 	bool last;
 
-	if (list_empty(&zone->unaccepted_pages))
-		return false;
-
 	spin_lock_irqsave(&zone->lock, flags);
 	page = list_first_entry_or_null(&zone->unaccepted_pages,
 					struct page, lru);
@@ -6861,23 +6866,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
 	return true;
 }
 
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
 {
 	long to_accept;
 	int ret = false;
 
-	/* How much to accept to get to high watermark? */
-	to_accept = high_wmark_pages(zone) -
-		    (zone_page_state(zone, NR_FREE_PAGES) -
-		    __zone_watermark_unusable_free(zone, order, 0));
+	if (!has_unaccepted_memory())
+		return false;
 
-	/* Accept at least one page */
-	do {
+	if (list_empty(&zone->unaccepted_pages))
+		return false;
+
+	/* How much to accept to get to high watermark? */
+	to_accept = promo_wmark_pages(zone) -
+		    (zone_page_state(zone, NR_FREE_PAGES) -
+		    __zone_watermark_unusable_free(zone, order, 0) -
+		    zone_page_state(zone, NR_UNACCEPTED));
+
+	while (to_accept > 0) {
 		if (!try_to_accept_memory_one(zone))
 			break;
 		ret = true;
 		to_accept -= MAX_ORDER_NR_PAGES;
-	} while (to_accept > 0);
+	}
 
 	return ret;
 }
@@ -6920,7 +6931,7 @@ static void accept_page(struct page *page, unsigned int order)
 {
 }
 
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_ccept_memory(struct zone *zone, unsigned int order)
 {
 	return false;
 }
-- 
  Kiryl Shutsemau / Kirill A. Shutemov


  reply	other threads:[~2024-07-23 13:53 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20240716130013.1997325-1-kirill.shutemov@linux.intel.com>
2024-07-17  7:19 ` Michal Hocko
2024-07-17 11:55   ` Kirill A. Shutemov
2024-07-17 12:06     ` Michal Hocko
2024-07-22 14:07       ` Kirill A. Shutemov
2024-07-23  7:30         ` Vlastimil Babka
2024-07-23  9:49           ` Kirill A. Shutemov
2024-07-23 11:55             ` Michal Hocko
2024-07-23 13:53               ` Kirill A. Shutemov [this message]
2024-07-17 21:00 ` Jianxiong Gao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=mdsrik4ryedfe62hnhokejq7botphyvodydcvsbcrdsmwufv7w@oasahvgppsbd \
    --to=kirill.shutemov@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=jxgao@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=rppt@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=thomas.lendacky@amd.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox