From: wujing <realwujing@qq.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>,
Lance Yang <lance.yang@linux.dev>,
Suren Baghdasaryan <surenb@google.com>,
Michal Hocko <mhocko@suse.com>,
Brendan Jackman <jackmanb@google.com>,
Johannes Weiner <hannes@cmpxchg.org>, Zi Yan <ziy@nvidia.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, Qiliang Yuan <yuanql9@chinatelecom.cn>,
wujing <realwujing@qq.com>
Subject: [PATCH v2 1/1] mm/page_alloc: auto-tune min_free_kbytes on atomic allocation failure
Date: Mon, 5 Jan 2026 16:21:52 +0800 [thread overview]
Message-ID: <tencent_6FE67BA7BE8376AB038A71ACAD4FF8A90006@qq.com> (raw)
In-Reply-To: <20260105081720.1308764-1-realwujing@qq.com>
Introduce a mechanism to dynamically increase vm.min_free_kbytes when
critical atomic allocations (GFP_ATOMIC, order-0) fail. This prevents
recurring network packet drops or other atomic failures by proactively
reserving more memory.
The system increases min_free_kbytes by 50% upon failure, capped at 1%
of total RAM. To prevent repeated adjustments during burst traffic, a
10-second debounce window is enforced.
After traffic subsides, min_free_kbytes automatically decays by 5% every
5 minutes. However, decay stops at 1.2x the initial value rather than
returning to baseline. This ensures the system "remembers" previous
pressure patterns and avoids repeated failures under similar load.
Observed failure logs:
[38535641.026406] node 0: slabs: 941, objs: 54656, free: 0
[38535641.037711] node 1: slabs: 349, objs: 22096, free: 272
[38535641.049025] node 1: slabs: 349, objs: 22096, free: 272
[38535642.795972] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535642.805017] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535642.816311] node 0: slabs: 854, objs: 42320, free: 0
[38535642.823066] node 1: slabs: 400, objs: 25360, free: 294
[38535643.070199] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535643.078861] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535643.089719] node 0: slabs: 841, objs: 41824, free: 0
[38535643.096513] node 1: slabs: 393, objs: 24480, free: 272
[38535643.484149] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535643.492831] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535643.503666] node 0: slabs: 898, objs: 43120, free: 159
[38535643.510140] node 1: slabs: 404, objs: 25424, free: 319
[38535644.699224] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535644.707911] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535644.718700] node 0: slabs: 1031, objs: 43328, free: 0
[38535644.725059] node 1: slabs: 339, objs: 17616, free: 317
[38535645.428345] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535645.436888] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535645.447664] node 0: slabs: 940, objs: 40864, free: 144
[38535645.454026] node 1: slabs: 322, objs: 19168, free: 383
[38535645.556122] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535645.564576] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535649.655523] warn_alloc: 59 callbacks suppressed
[38535649.655527] swapper/100: page allocation failure: order:0, mode:0x480020(GFP_ATOMIC), nodemask=(null)
[38535649.671692] swapper/100 cpuset=/ mems_allowed=0-1
Signed-off-by: wujing <realwujing@qq.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
mm/page_alloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c380f063e8b7..2f12d7a9ecbc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
#include <linux/oom.h>
#include <linux/topology.h>
#include <linux/sysctl.h>
+#include <linux/workqueue.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/pagevec.h>
@@ -3975,6 +3976,16 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
mem_cgroup_show_protected_memory(NULL);
}
+/* Auto-tuning min_free_kbytes on atomic allocation failures (v2) */
+static void decay_min_free_kbytes_workfn(struct work_struct *work);
+static void boost_min_free_kbytes_workfn(struct work_struct *work);
+static DECLARE_WORK(boost_min_free_kbytes_work, boost_min_free_kbytes_workfn);
+static DECLARE_DELAYED_WORK(decay_min_free_kbytes_work, decay_min_free_kbytes_workfn);
+static unsigned long last_boost_jiffies = 0;
+static int initial_min_free_kbytes = 0;
+#define BOOST_DEBOUNCE_MS 10000 /* 10 seconds debounce */
+
+
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
{
struct va_format vaf;
@@ -4947,6 +4958,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto retry;
}
fail:
+ /* Auto-tuning: trigger boost if atomic allocation fails */
+ if ((gfp_mask & GFP_ATOMIC) && order == 0) {
+ unsigned long now = jiffies;
+
+ /* Debounce: only boost once every 10 seconds */
+ if (time_after(now, last_boost_jiffies + msecs_to_jiffies(BOOST_DEBOUNCE_MS))) {
+ last_boost_jiffies = now;
+ schedule_work(&boost_min_free_kbytes_work);
+ }
+ }
+
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
@@ -6526,6 +6548,10 @@ int __meminit init_per_zone_wmark_min(void)
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
+ /* Save initial value for auto-tuning decay mechanism */
+ if (initial_min_free_kbytes == 0)
+ initial_min_free_kbytes = min_free_kbytes;
+
#ifdef CONFIG_NUMA
setup_min_unmapped_ratio();
setup_min_slab_ratio();
@@ -7682,3 +7708,62 @@ struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int or
return page;
}
EXPORT_SYMBOL_GPL(alloc_pages_nolock_noprof);
+
+static void boost_min_free_kbytes_workfn(struct work_struct *work)
+{
+ int new_min;
+
+ /* Cap at 1% of total RAM for safety */
+ unsigned long total_kbytes = totalram_pages() << (PAGE_SHIFT - 10);
+ int max_limit = total_kbytes / 100;
+
+ /* Responsive increase: 50% instead of doubling */
+ new_min = min_free_kbytes + (min_free_kbytes / 2);
+
+ if (new_min > max_limit)
+ new_min = max_limit;
+
+ if (new_min > min_free_kbytes) {
+ min_free_kbytes = new_min;
+ /* Update user_min_free_kbytes so it persists through recalculations */
+ if (new_min > user_min_free_kbytes)
+ user_min_free_kbytes = new_min;
+
+ setup_per_zone_wmarks();
+
+ /* Schedule decay after 5 minutes */
+ schedule_delayed_work(&decay_min_free_kbytes_work,
+ msecs_to_jiffies(300000));
+
+ pr_info("Auto-tuning: atomic failure, increasing min_free_kbytes to %d\n",
+ min_free_kbytes);
+ }
+}
+
+static void decay_min_free_kbytes_workfn(struct work_struct *work)
+{
+ int new_min;
+ int decay_floor;
+
+ /* Decay by 5% */
+ new_min = min_free_kbytes - (min_free_kbytes / 20);
+
+ /* Don't go below 1.2x initial value (preserve learning effect) */
+ decay_floor = initial_min_free_kbytes + (initial_min_free_kbytes / 5);
+ if (new_min < decay_floor)
+ new_min = decay_floor;
+
+ if (new_min < min_free_kbytes) {
+ min_free_kbytes = new_min;
+ user_min_free_kbytes = new_min;
+ setup_per_zone_wmarks();
+
+ /* Schedule next decay if still above floor */
+ if (new_min > decay_floor) {
+ schedule_delayed_work(&decay_min_free_kbytes_work,
+ msecs_to_jiffies(300000));
+ }
+
+ pr_info("Auto-tuning: decaying min_free_kbytes to %d\n", min_free_kbytes);
+ }
+}
--
2.39.5
next parent reply other threads:[~2026-01-05 8:22 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20260105081720.1308764-1-realwujing@qq.com>
2026-01-05 8:21 ` wujing [this message]
2026-01-05 9:56 ` Vlastimil Babka
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tencent_6FE67BA7BE8376AB038A71ACAD4FF8A90006@qq.com \
--to=realwujing@qq.com \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=jackmanb@google.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=netdev@vger.kernel.org \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=yuanql9@chinatelecom.cn \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox