* [PATCH v2 1/1] mm/page_alloc: auto-tune min_free_kbytes on atomic allocation failure
[not found] <20260105081720.1308764-1-realwujing@qq.com>
@ 2026-01-05 8:21 ` wujing
2026-01-05 9:56 ` Vlastimil Babka
0 siblings, 1 reply; 2+ messages in thread
From: wujing @ 2026-01-05 8:21 UTC (permalink / raw)
To: Andrew Morton, Vlastimil Babka
Cc: Matthew Wilcox, Lance Yang, Suren Baghdasaryan, Michal Hocko,
Brendan Jackman, Johannes Weiner, Zi Yan, linux-mm, linux-kernel,
netdev, Qiliang Yuan, wujing
Introduce a mechanism to dynamically increase vm.min_free_kbytes when
critical atomic allocations (GFP_ATOMIC, order-0) fail. This prevents
recurring network packet drops or other atomic failures by proactively
reserving more memory.
The system increases min_free_kbytes by 50% upon failure, capped at 1%
of total RAM. To prevent repeated adjustments during burst traffic, a
10-second debounce window is enforced.
After traffic subsides, min_free_kbytes automatically decays by 5% every
5 minutes. However, decay stops at 1.2x the initial value rather than
returning to baseline. This ensures the system "remembers" previous
pressure patterns and avoids repeated failures under similar load.
Observed failure logs:
[38535641.026406] node 0: slabs: 941, objs: 54656, free: 0
[38535641.037711] node 1: slabs: 349, objs: 22096, free: 272
[38535641.049025] node 1: slabs: 349, objs: 22096, free: 272
[38535642.795972] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535642.805017] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535642.816311] node 0: slabs: 854, objs: 42320, free: 0
[38535642.823066] node 1: slabs: 400, objs: 25360, free: 294
[38535643.070199] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535643.078861] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535643.089719] node 0: slabs: 841, objs: 41824, free: 0
[38535643.096513] node 1: slabs: 393, objs: 24480, free: 272
[38535643.484149] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535643.492831] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535643.503666] node 0: slabs: 898, objs: 43120, free: 159
[38535643.510140] node 1: slabs: 404, objs: 25424, free: 319
[38535644.699224] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535644.707911] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535644.718700] node 0: slabs: 1031, objs: 43328, free: 0
[38535644.725059] node 1: slabs: 339, objs: 17616, free: 317
[38535645.428345] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535645.436888] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535645.447664] node 0: slabs: 940, objs: 40864, free: 144
[38535645.454026] node 1: slabs: 322, objs: 19168, free: 383
[38535645.556122] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
[38535645.564576] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
[38535649.655523] warn_alloc: 59 callbacks suppressed
[38535649.655527] swapper/100: page allocation failure: order:0, mode:0x480020(GFP_ATOMIC), nodemask=(null)
[38535649.671692] swapper/100 cpuset=/ mems_allowed=0-1
Signed-off-by: wujing <realwujing@qq.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
mm/page_alloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c380f063e8b7..2f12d7a9ecbc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
#include <linux/oom.h>
#include <linux/topology.h>
#include <linux/sysctl.h>
+#include <linux/workqueue.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/pagevec.h>
@@ -3975,6 +3976,16 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
mem_cgroup_show_protected_memory(NULL);
}
+/* Auto-tuning min_free_kbytes on atomic allocation failures (v2) */
+static void decay_min_free_kbytes_workfn(struct work_struct *work);
+static void boost_min_free_kbytes_workfn(struct work_struct *work);
+static DECLARE_WORK(boost_min_free_kbytes_work, boost_min_free_kbytes_workfn);
+static DECLARE_DELAYED_WORK(decay_min_free_kbytes_work, decay_min_free_kbytes_workfn);
+static unsigned long last_boost_jiffies = 0;
+static int initial_min_free_kbytes = 0;
+#define BOOST_DEBOUNCE_MS 10000 /* 10 seconds debounce */
+
+
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
{
struct va_format vaf;
@@ -4947,6 +4958,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto retry;
}
fail:
+ /* Auto-tuning: trigger boost if atomic allocation fails */
+ if ((gfp_mask & GFP_ATOMIC) && order == 0) {
+ unsigned long now = jiffies;
+
+ /* Debounce: only boost once every 10 seconds */
+ if (time_after(now, last_boost_jiffies + msecs_to_jiffies(BOOST_DEBOUNCE_MS))) {
+ last_boost_jiffies = now;
+ schedule_work(&boost_min_free_kbytes_work);
+ }
+ }
+
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
@@ -6526,6 +6548,10 @@ int __meminit init_per_zone_wmark_min(void)
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
+ /* Save initial value for auto-tuning decay mechanism */
+ if (initial_min_free_kbytes == 0)
+ initial_min_free_kbytes = min_free_kbytes;
+
#ifdef CONFIG_NUMA
setup_min_unmapped_ratio();
setup_min_slab_ratio();
@@ -7682,3 +7708,62 @@ struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int or
return page;
}
EXPORT_SYMBOL_GPL(alloc_pages_nolock_noprof);
+
+static void boost_min_free_kbytes_workfn(struct work_struct *work)
+{
+ int new_min;
+
+ /* Cap at 1% of total RAM for safety */
+ unsigned long total_kbytes = totalram_pages() << (PAGE_SHIFT - 10);
+ int max_limit = total_kbytes / 100;
+
+ /* Responsive increase: 50% instead of doubling */
+ new_min = min_free_kbytes + (min_free_kbytes / 2);
+
+ if (new_min > max_limit)
+ new_min = max_limit;
+
+ if (new_min > min_free_kbytes) {
+ min_free_kbytes = new_min;
+ /* Update user_min_free_kbytes so it persists through recalculations */
+ if (new_min > user_min_free_kbytes)
+ user_min_free_kbytes = new_min;
+
+ setup_per_zone_wmarks();
+
+ /* Schedule decay after 5 minutes */
+ schedule_delayed_work(&decay_min_free_kbytes_work,
+ msecs_to_jiffies(300000));
+
+ pr_info("Auto-tuning: atomic failure, increasing min_free_kbytes to %d\n",
+ min_free_kbytes);
+ }
+}
+
+static void decay_min_free_kbytes_workfn(struct work_struct *work)
+{
+ int new_min;
+ int decay_floor;
+
+ /* Decay by 5% */
+ new_min = min_free_kbytes - (min_free_kbytes / 20);
+
+ /* Don't go below 1.2x initial value (preserve learning effect) */
+ decay_floor = initial_min_free_kbytes + (initial_min_free_kbytes / 5);
+ if (new_min < decay_floor)
+ new_min = decay_floor;
+
+ if (new_min < min_free_kbytes) {
+ min_free_kbytes = new_min;
+ user_min_free_kbytes = new_min;
+ setup_per_zone_wmarks();
+
+ /* Schedule next decay if still above floor */
+ if (new_min > decay_floor) {
+ schedule_delayed_work(&decay_min_free_kbytes_work,
+ msecs_to_jiffies(300000));
+ }
+
+ pr_info("Auto-tuning: decaying min_free_kbytes to %d\n", min_free_kbytes);
+ }
+}
--
2.39.5
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v2 1/1] mm/page_alloc: auto-tune min_free_kbytes on atomic allocation failure
2026-01-05 8:21 ` [PATCH v2 1/1] mm/page_alloc: auto-tune min_free_kbytes on atomic allocation failure wujing
@ 2026-01-05 9:56 ` Vlastimil Babka
0 siblings, 0 replies; 2+ messages in thread
From: Vlastimil Babka @ 2026-01-05 9:56 UTC (permalink / raw)
To: wujing, Andrew Morton
Cc: Matthew Wilcox, Lance Yang, Suren Baghdasaryan, Michal Hocko,
Brendan Jackman, Johannes Weiner, Zi Yan, linux-mm, linux-kernel,
netdev, Qiliang Yuan
On 1/5/26 09:21, wujing wrote:
> Introduce a mechanism to dynamically increase vm.min_free_kbytes when
> critical atomic allocations (GFP_ATOMIC, order-0) fail. This prevents
> recurring network packet drops or other atomic failures by proactively
> reserving more memory.
Were they really packet drops observed? AFAIK the receive is deferred to
non-irq context if those atomic allocations fail, it shouldn't mean a drop.
I also recall the main source of these GFP_ATOMIC failure warnings was
finally silenced some time ago? Maybe we just need more silencing.
Thus I'd be reluctant to proceed unless there's confirmed benefit and
interest from netdev.
As for the implementation I'd rather not be changing min_free_kbytes
directly as that could interact with admin changing that in unpredictable
ways. We already have watermark_boost to dynamically change watermarks (for
other reasons) and seems it would be better to expand that.
> The system increases min_free_kbytes by 50% upon failure, capped at 1%
> of total RAM. To prevent repeated adjustments during burst traffic, a
> 10-second debounce window is enforced.
>
> After traffic subsides, min_free_kbytes automatically decays by 5% every
> 5 minutes. However, decay stops at 1.2x the initial value rather than
> returning to baseline. This ensures the system "remembers" previous
> pressure patterns and avoids repeated failures under similar load.
>
> Observed failure logs:
> [38535641.026406] node 0: slabs: 941, objs: 54656, free: 0
> [38535641.037711] node 1: slabs: 349, objs: 22096, free: 272
> [38535641.049025] node 1: slabs: 349, objs: 22096, free: 272
> [38535642.795972] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535642.805017] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535642.816311] node 0: slabs: 854, objs: 42320, free: 0
> [38535642.823066] node 1: slabs: 400, objs: 25360, free: 294
> [38535643.070199] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535643.078861] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535643.089719] node 0: slabs: 841, objs: 41824, free: 0
> [38535643.096513] node 1: slabs: 393, objs: 24480, free: 272
> [38535643.484149] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535643.492831] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535643.503666] node 0: slabs: 898, objs: 43120, free: 159
> [38535643.510140] node 1: slabs: 404, objs: 25424, free: 319
> [38535644.699224] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535644.707911] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535644.718700] node 0: slabs: 1031, objs: 43328, free: 0
> [38535644.725059] node 1: slabs: 339, objs: 17616, free: 317
> [38535645.428345] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535645.436888] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535645.447664] node 0: slabs: 940, objs: 40864, free: 144
> [38535645.454026] node 1: slabs: 322, objs: 19168, free: 383
> [38535645.556122] SLUB: Unable to allocate memory on node -1, gfp=0x480020(GFP_ATOMIC)
> [38535645.564576] cache: skbuff_head_cache, object size: 232, buffer size: 256, default order: 2, min order: 0
> [38535649.655523] warn_alloc: 59 callbacks suppressed
> [38535649.655527] swapper/100: page allocation failure: order:0, mode:0x480020(GFP_ATOMIC), nodemask=(null)
> [38535649.671692] swapper/100 cpuset=/ mems_allowed=0-1
>
> Signed-off-by: wujing <realwujing@qq.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
> ---
> mm/page_alloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 85 insertions(+)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c380f063e8b7..2f12d7a9ecbc 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -30,6 +30,7 @@
> #include <linux/oom.h>
> #include <linux/topology.h>
> #include <linux/sysctl.h>
> +#include <linux/workqueue.h>
> #include <linux/cpu.h>
> #include <linux/cpuset.h>
> #include <linux/pagevec.h>
> @@ -3975,6 +3976,16 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
> mem_cgroup_show_protected_memory(NULL);
> }
>
> +/* Auto-tuning min_free_kbytes on atomic allocation failures (v2) */
> +static void decay_min_free_kbytes_workfn(struct work_struct *work);
> +static void boost_min_free_kbytes_workfn(struct work_struct *work);
> +static DECLARE_WORK(boost_min_free_kbytes_work, boost_min_free_kbytes_workfn);
> +static DECLARE_DELAYED_WORK(decay_min_free_kbytes_work, decay_min_free_kbytes_workfn);
> +static unsigned long last_boost_jiffies = 0;
> +static int initial_min_free_kbytes = 0;
> +#define BOOST_DEBOUNCE_MS 10000 /* 10 seconds debounce */
> +
> +
> void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
> {
> struct va_format vaf;
> @@ -4947,6 +4958,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> goto retry;
> }
> fail:
> + /* Auto-tuning: trigger boost if atomic allocation fails */
> + if ((gfp_mask & GFP_ATOMIC) && order == 0) {
> + unsigned long now = jiffies;
> +
> + /* Debounce: only boost once every 10 seconds */
> + if (time_after(now, last_boost_jiffies + msecs_to_jiffies(BOOST_DEBOUNCE_MS))) {
> + last_boost_jiffies = now;
> + schedule_work(&boost_min_free_kbytes_work);
> + }
> + }
> +
> warn_alloc(gfp_mask, ac->nodemask,
> "page allocation failure: order:%u", order);
> got_pg:
> @@ -6526,6 +6548,10 @@ int __meminit init_per_zone_wmark_min(void)
> refresh_zone_stat_thresholds();
> setup_per_zone_lowmem_reserve();
>
> + /* Save initial value for auto-tuning decay mechanism */
> + if (initial_min_free_kbytes == 0)
> + initial_min_free_kbytes = min_free_kbytes;
> +
> #ifdef CONFIG_NUMA
> setup_min_unmapped_ratio();
> setup_min_slab_ratio();
> @@ -7682,3 +7708,62 @@ struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int or
> return page;
> }
> EXPORT_SYMBOL_GPL(alloc_pages_nolock_noprof);
> +
> +static void boost_min_free_kbytes_workfn(struct work_struct *work)
> +{
> + int new_min;
> +
> + /* Cap at 1% of total RAM for safety */
> + unsigned long total_kbytes = totalram_pages() << (PAGE_SHIFT - 10);
> + int max_limit = total_kbytes / 100;
> +
> + /* Responsive increase: 50% instead of doubling */
> + new_min = min_free_kbytes + (min_free_kbytes / 2);
> +
> + if (new_min > max_limit)
> + new_min = max_limit;
> +
> + if (new_min > min_free_kbytes) {
> + min_free_kbytes = new_min;
> + /* Update user_min_free_kbytes so it persists through recalculations */
> + if (new_min > user_min_free_kbytes)
> + user_min_free_kbytes = new_min;
> +
> + setup_per_zone_wmarks();
> +
> + /* Schedule decay after 5 minutes */
> + schedule_delayed_work(&decay_min_free_kbytes_work,
> + msecs_to_jiffies(300000));
> +
> + pr_info("Auto-tuning: atomic failure, increasing min_free_kbytes to %d\n",
> + min_free_kbytes);
> + }
> +}
> +
> +static void decay_min_free_kbytes_workfn(struct work_struct *work)
> +{
> + int new_min;
> + int decay_floor;
> +
> + /* Decay by 5% */
> + new_min = min_free_kbytes - (min_free_kbytes / 20);
> +
> + /* Don't go below 1.2x initial value (preserve learning effect) */
> + decay_floor = initial_min_free_kbytes + (initial_min_free_kbytes / 5);
> + if (new_min < decay_floor)
> + new_min = decay_floor;
> +
> + if (new_min < min_free_kbytes) {
> + min_free_kbytes = new_min;
> + user_min_free_kbytes = new_min;
> + setup_per_zone_wmarks();
> +
> + /* Schedule next decay if still above floor */
> + if (new_min > decay_floor) {
> + schedule_delayed_work(&decay_min_free_kbytes_work,
> + msecs_to_jiffies(300000));
> + }
> +
> + pr_info("Auto-tuning: decaying min_free_kbytes to %d\n", min_free_kbytes);
> + }
> +}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-01-05 9:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20260105081720.1308764-1-realwujing@qq.com>
2026-01-05 8:21 ` [PATCH v2 1/1] mm/page_alloc: auto-tune min_free_kbytes on atomic allocation failure wujing
2026-01-05 9:56 ` Vlastimil Babka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox