From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>,
Andrew Morton <akpm@linux-foundation.org>,
"Borislav Petkov (AMD)" <bp@alien8.de>,
Mel Gorman <mgorman@suse.de>,
Tom Lendacky <thomas.lendacky@amd.com>,
Mike Rapoport <rppt@kernel.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Jianxiong Gao <jxgao@google.com>,
stable@vger.kernel.org
Subject: Re: [PATCH] mm: Fix endless reclaim on machines with unaccepted memory.
Date: Tue, 23 Jul 2024 16:53:05 +0300 [thread overview]
Message-ID: <mdsrik4ryedfe62hnhokejq7botphyvodydcvsbcrdsmwufv7w@oasahvgppsbd> (raw)
In-Reply-To: <Zp-aIfs3DNhAVBmO@tiehlicka>
On Tue, Jul 23, 2024 at 01:55:13PM +0200, Michal Hocko wrote:
> On Tue 23-07-24 12:49:41, Kirill A. Shutemov wrote:
> > On Tue, Jul 23, 2024 at 09:30:27AM +0200, Vlastimil Babka wrote:
> [...]
> > > Although just removing the lazy accept mode would be much more appealing
> > > solution than this :)
> >
> > :P
> >
> > Not really an option for big VMs. It might add many minutes to boot time.
>
> Well a huge part of that can be done in the background so the boot
> doesn't really have to wait for all of it. If we really have to start
> playing whack-a-mole to plug all the potential ways to trigger reclaim
> imbalance I think it is fair to re-evaluate how much lazy should the
> initialization really be.
One other option I see is to treat unaccepted memory as free, so
watermarks would not fail if we have unaccepted memory. No spinning in
kswapd in this case.
Only get_page_from_freelist() and __alloc_pages_bulk() is aware about
unaccepted memory.
The quick patch below shows the idea.
I am not sure how it would affect __isolate_free_page() callers. IIUC,
they expect to see pages on free lists, but might not find them there
in this scenario because they are not accepted yet.
I need to look closer at this.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c11b7cde81ef..5e0bdfbe2f1f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -667,6 +667,7 @@ enum zone_watermarks {
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
+#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 14d39f34d336..254bfe29eaf1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -304,7 +304,7 @@ EXPORT_SYMBOL(nr_online_nodes);
static bool page_contains_unaccepted(struct page *page, unsigned int order);
static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order);
static inline bool has_unaccepted_memory(void);
static bool __free_unaccepted(struct page *page);
@@ -2947,9 +2947,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
-#ifdef CONFIG_UNACCEPTED_MEMORY
- unusable_free += zone_page_state(z, NR_UNACCEPTED);
-#endif
return unusable_free;
}
@@ -3243,6 +3240,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
}
+ cond_accept_memory(zone, order);
+
/*
* Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free
@@ -3268,10 +3267,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
gfp_mask)) {
int ret;
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -3325,10 +3322,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
return page;
} else {
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */
@@ -4456,12 +4451,25 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
goto failed;
}
+ cond_accept_memory(zone, 0);
+retry_this_zone:
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
if (zone_watermark_fast(zone, 0, mark,
zonelist_zone_idx(ac.preferred_zoneref),
alloc_flags, gfp)) {
break;
}
+
+ if (cond_accept_memory(zone, 0))
+ goto retry_this_zone;
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ /* Try again if zone has deferred pages */
+ if (deferred_pages_enabled()) {
+ if (_deferred_grow_zone(zone, 0))
+ goto retry_this_zone;
+ }
+#endif
}
/*
@@ -6833,9 +6841,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
struct page *page;
bool last;
- if (list_empty(&zone->unaccepted_pages))
- return false;
-
spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages,
struct page, lru);
@@ -6861,23 +6866,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
int ret = false;
- /* How much to accept to get to high watermark? */
- to_accept = high_wmark_pages(zone) -
- (zone_page_state(zone, NR_FREE_PAGES) -
- __zone_watermark_unusable_free(zone, order, 0));
+ if (!has_unaccepted_memory())
+ return false;
- /* Accept at least one page */
- do {
+ if (list_empty(&zone->unaccepted_pages))
+ return false;
+
+ /* How much to accept to get to high watermark? */
+ to_accept = promo_wmark_pages(zone) -
+ (zone_page_state(zone, NR_FREE_PAGES) -
+ __zone_watermark_unusable_free(zone, order, 0) -
+ zone_page_state(zone, NR_UNACCEPTED));
+
+ while (to_accept > 0) {
if (!try_to_accept_memory_one(zone))
break;
ret = true;
to_accept -= MAX_ORDER_NR_PAGES;
- } while (to_accept > 0);
+ }
return ret;
}
@@ -6920,7 +6931,7 @@ static void accept_page(struct page *page, unsigned int order)
{
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_ccept_memory(struct zone *zone, unsigned int order)
{
return false;
}
--
Kiryl Shutsemau / Kirill A. Shutemov
next prev parent reply other threads:[~2024-07-23 13:53 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20240716130013.1997325-1-kirill.shutemov@linux.intel.com>
2024-07-17 7:19 ` Michal Hocko
2024-07-17 11:55 ` Kirill A. Shutemov
2024-07-17 12:06 ` Michal Hocko
2024-07-22 14:07 ` Kirill A. Shutemov
2024-07-23 7:30 ` Vlastimil Babka
2024-07-23 9:49 ` Kirill A. Shutemov
2024-07-23 11:55 ` Michal Hocko
2024-07-23 13:53 ` Kirill A. Shutemov [this message]
2024-07-17 21:00 ` Jianxiong Gao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mdsrik4ryedfe62hnhokejq7botphyvodydcvsbcrdsmwufv7w@oasahvgppsbd \
--to=kirill.shutemov@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=jxgao@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mhocko@suse.com \
--cc=rppt@kernel.org \
--cc=stable@vger.kernel.org \
--cc=thomas.lendacky@amd.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox