linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Sourav Panda <souravpanda@google.com>
To: akpm@linux-foundation.org, linux-mm@kvack.org,
	 linux-kernel@vger.kernel.org
Cc: lsf-pc@lists.linux-foundation.org, songmuchun@bytedance.com,
	 osalvador@suse.de, mike.kravetz@oracle.com,
	mathieu.desnoyers@efficios.com,  willy@infradead.org,
	david@redhat.com, pasha.tatashin@soleen.com,
	 rientjes@google.com, weixugc@google.com, gthelen@google.com,
	 souravpanda@google.com, surenb@google.com
Subject: [LSF/MM/BPF TOPIC][RFC PATCH 1/2] mm: add hugepage shrinker for frozen memory
Date: Wed, 18 Mar 2026 23:41:25 +0000	[thread overview]
Message-ID: <20260318234126.3216529-2-souravpanda@google.com> (raw)
In-Reply-To: <20260318234126.3216529-1-souravpanda@google.com>

Implement a shrinker for the hugetlbfs subsystem to provide one-way
fungibility, converting unused persistent huge pages back to the
buddy system. One Huge page at a time.

This is designed for virtualization user cases, where
a large pool of huge pages is reserved but kept free, acting as a
"frozen" memory reservoir. When the host experiences memory pressure,
this shrinker thaws the memory by reclaiming huge pages on-demand.

Pass the hugetlb_shrinker_enabled=1 kernel command line param to enable.
Please note the nr_huge_pages will change without user intervention.

Both kswapd and direct reclaim can shrink gigantic hugepages when
the system is under memory pressure.  To safely support concurrent
reclaimers (e.g., kswapd and multiple direct reclaim tasks), a new
mutex `hugepage_shrink_mutex` is introduced.

Signed-off-by: Sourav Panda <souravpanda@google.com>
---
 include/linux/shrinker.h |   2 +
 mm/Kconfig               |   9 +++
 mm/hugetlb.c             | 125 +++++++++++++++++++++++++++++++++++++++
 mm/shrinker.c            |   2 +
 4 files changed, 138 insertions(+)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..5374c251ee9e 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -51,6 +51,8 @@ struct shrink_control {
 	 */
 	unsigned long nr_scanned;
 
+	s8 priority;
+
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
 };
diff --git a/mm/Kconfig b/mm/Kconfig
index ebd8ea353687..a88f370c7485 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -769,6 +769,15 @@ config NOMMU_INITIAL_TRIM_EXCESS
 config ARCH_WANT_GENERAL_HUGETLB
 	bool
 
+config HUGETLB_FROZEN_MEMORY_SHRINKER
+	bool "HugeTLB Frozen Memory Shrinker"
+	depends on HUGETLBFS
+	help
+	  Enables a shrinker for the hugetlb subsystem that allows
+	  unused huge pages to be released back to the buddy
+	  system under memory pressure. One huge page at a time.
+	  Further gated by kernel cmdline hugetlb_shrinker_enabled.
+
 config ARCH_WANTS_THP_SWAP
 	def_bool n
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 327eaa4074d3..d4953ff1dda1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -27,6 +27,7 @@
 #include <linux/string_helpers.h>
 #include <linux/swap.h>
 #include <linux/leafops.h>
+#include <linux/shrinker.h>
 #include <linux/jhash.h>
 #include <linux/numa.h>
 #include <linux/llist.h>
@@ -4127,6 +4128,129 @@ ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
 	return err ? err : len;
 }
 
+#ifdef CONFIG_HUGETLB_FROZEN_MEMORY_SHRINKER
+
+static bool hugetlb_shrinker_enabled;
+static int __init cmdline_parse_hugetlb_shrinker_enabled(char *p)
+{
+	return kstrtobool(p, &hugetlb_shrinker_enabled);
+}
+early_param("hugetlb_shrinker_enabled", cmdline_parse_hugetlb_shrinker_enabled);
+
+static unsigned long hugepage_shrinker_count(struct shrinker *s,
+					     struct shrink_control *sc)
+{
+	struct hstate *h;
+
+	if (sc->priority >= DEF_PRIORITY - 6)
+		return 0;
+
+	if (!gigantic_page_runtime_supported())
+		return 0;
+
+	for_each_hstate(h) {
+		if (hstate_is_gigantic(h) && h->nr_huge_pages_node[sc->nid] > 0)
+			return SWAP_CLUSTER_MAX;
+	}
+	return 0;
+}
+
+static bool hugepage_shrinker_is_watermark_ok(int nid)
+{
+	int i;
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		unsigned long mark;
+		unsigned long free_pages;
+		struct zone *zone = pgdat->node_zones + i;
+
+		if (!managed_zone(zone))
+			continue;
+
+		mark = high_wmark_pages(zone);
+		free_pages = zone_page_state(zone, NR_FREE_PAGES);
+		if (__zone_watermark_ok(zone, MAX_PAGE_ORDER, mark,
+					MAX_NR_ZONES, 0, free_pages))
+			return true;
+	}
+	return false;
+}
+
+static DEFINE_MUTEX(hugepage_shrink_mutex);
+
+static unsigned long hugepage_shrinker_scan(struct shrinker *s,
+					    struct shrink_control *sc)
+{
+	int err;
+	struct hstate *h;
+	unsigned long old_nr;
+	nodemask_t nodes_allowed;
+
+	if (sc->priority >= DEF_PRIORITY - 6)
+		return SHRINK_STOP;
+
+	if (sc->nr_to_scan == 0)
+		return SHRINK_STOP;
+
+	if (!gigantic_page_runtime_supported())
+		return SHRINK_STOP;
+
+	if (hugepage_shrinker_is_watermark_ok(sc->nid))
+		return SHRINK_STOP;
+
+	mutex_lock(&hugepage_shrink_mutex);
+
+	if (hugepage_shrinker_is_watermark_ok(sc->nid))
+		goto unlock;
+
+	init_nodemask_of_node(&nodes_allowed, sc->nid);
+
+	for_each_hstate(h) {
+		if (!hstate_is_gigantic(h))
+			continue;
+
+		old_nr = h->nr_huge_pages_node[sc->nid];
+		if (!old_nr)
+			continue;
+
+		err = set_max_huge_pages(h, old_nr - 1, sc->nid, &nodes_allowed);
+		if (!err)
+			goto unlock;
+	}
+unlock:
+	mutex_unlock(&hugepage_shrink_mutex);
+	return SHRINK_STOP;
+}
+
+static struct shrinker *hugepage_shrinker;
+
+static int __init hugetlb_shrinker_init(void)
+{
+	if (!hugetlb_shrinker_enabled)
+		return 0;
+
+	hugepage_shrinker = shrinker_alloc(0, "hugetlbfs");
+	if (!hugepage_shrinker)
+		return -ENOMEM;
+
+	hugepage_shrinker->count_objects = hugepage_shrinker_count;
+	hugepage_shrinker->scan_objects = hugepage_shrinker_scan;
+	hugepage_shrinker->seeks = 0;
+	hugepage_shrinker->batch = 1;
+
+	pr_info("Registering hugetlbfs shrinker\n");
+	shrinker_register(hugepage_shrinker);
+
+	return 0;
+}
+#else
+static int __init hugetlb_shrinker_init(void)
+{
+	return 0;
+}
+#endif
+
 static int __init hugetlb_init(void)
 {
 	int i;
@@ -4183,6 +4307,7 @@ static int __init hugetlb_init(void)
 	hugetlb_sysfs_init();
 	hugetlb_cgroup_file_init();
 	hugetlb_sysctl_init();
+	hugetlb_shrinker_init();
 
 #ifdef CONFIG_SMP
 	num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
diff --git a/mm/shrinker.c b/mm/shrinker.c
index 7b61fc0ee78f..8a7a05182465 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -529,6 +529,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 				.gfp_mask = gfp_mask,
 				.nid = nid,
 				.memcg = memcg,
+				.priority = priority,
 			};
 			struct shrinker *shrinker;
 			int shrinker_id = calc_shrinker_id(index, offset);
@@ -654,6 +655,7 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
 			.gfp_mask = gfp_mask,
 			.nid = nid,
 			.memcg = memcg,
+			.priority = priority,
 		};
 
 		if (!shrinker_try_get(shrinker))
-- 
2.53.0.983.g0bb29b3bc5-goog



  reply	other threads:[~2026-03-18 23:41 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-18 23:41 [LSF/MM/BPF TOPIC][RFC PATCH 0/2] Hugetlb Fungibility for page metadata savings and network performance Sourav Panda
2026-03-18 23:41 ` Sourav Panda [this message]
2026-03-18 23:41 ` [LSF/MM/BPF TOPIC][RFC PATCH 2/2] mm/hugetlb: skip hugetlb shrinking for proactive reclaim Sourav Panda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260318234126.3216529-2-souravpanda@google.com \
    --to=souravpanda@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=gthelen@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mike.kravetz@oracle.com \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    --cc=rientjes@google.com \
    --cc=songmuchun@bytedance.com \
    --cc=surenb@google.com \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox