From: 李喆 <lizhe.67@bytedance.com>
To: <muchun.song@linux.dev>, <osalvador@suse.de>, <david@kernel.org>,
<akpm@linux-foundation.org>, <fvdl@google.com>
Cc: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
<lizhe.67@bytedance.com>
Subject: [PATCH 7/8] mm/hugetlb: add epoll support for interface "zeroable_hugepages"
Date: Thu, 25 Dec 2025 16:20:58 +0800 [thread overview]
Message-ID: <20251225082059.1632-8-lizhe.67@bytedance.com> (raw)
In-Reply-To: <20251225082059.1632-1-lizhe.67@bytedance.com>
From: Li Zhe <lizhe.67@bytedance.com>
Add epoll support for interface "zeroable_hugepages". When no huge folios
are available for pre-zeroing, user space can block on the
zeroable_hugepages file with epoll, and it will be woken as soon as one
or more huge folios become eligible for pre-zeroing.
Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
---
mm/hugetlb.c | 13 +++++++++++++
mm/hugetlb_internal.h | 6 ++++++
mm/hugetlb_sysfs.c | 22 +++++++++++++++++++++-
3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8d36487659f8..c2df0317fe15 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1868,6 +1868,7 @@ void free_huge_folio(struct folio *folio)
arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
spin_unlock_irqrestore(&hugetlb_lock, flags);
+ do_zero_free_notify(h, folio_nid(folio));
}
}
@@ -1999,8 +2000,10 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
void prep_and_add_allocated_folios(struct hstate *h,
struct list_head *folio_list)
{
+ nodemask_t allocated_mask = NODE_MASK_NONE;
unsigned long flags;
struct folio *folio, *tmp_f;
+ int nid;
/* Send list for bulk vmemmap optimization processing */
hugetlb_vmemmap_optimize_folios(h, folio_list);
@@ -2010,8 +2013,12 @@ void prep_and_add_allocated_folios(struct hstate *h,
list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
prep_account_new_hugetlb_folio(h, folio);
enqueue_hugetlb_folio(h, folio);
+ node_set(folio_nid(folio), allocated_mask);
}
spin_unlock_irqrestore(&hugetlb_lock, flags);
+
+ for_each_node_mask(nid, allocated_mask)
+ do_zero_free_notify(h, nid);
}
/*
@@ -2383,6 +2390,8 @@ static int gather_surplus_pages(struct hstate *h, long delta)
long needed, allocated;
bool alloc_ok = true;
nodemask_t *mbind_nodemask, alloc_nodemask;
+ nodemask_t allocated_mask = NODE_MASK_NONE;
+ int nid;
mbind_nodemask = policy_mbind_nodemask(htlb_alloc_mask(h));
if (mbind_nodemask)
@@ -2455,9 +2464,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
break;
/* Add the page to the hugetlb allocator */
enqueue_hugetlb_folio(h, folio);
+ node_set(folio_nid(folio), allocated_mask);
}
free:
spin_unlock_irq(&hugetlb_lock);
+ for_each_node_mask(nid, allocated_mask)
+ do_zero_free_notify(h, nid);
/*
* Free unnecessary surplus pages to the buddy allocator.
@@ -2841,6 +2853,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
* Folio has been replaced, we can safely free the old one.
*/
spin_unlock_irq(&hugetlb_lock);
+ do_zero_free_notify(h, folio_nid(new_folio));
update_and_free_hugetlb_folio(h, old_folio, false);
}
diff --git a/mm/hugetlb_internal.h b/mm/hugetlb_internal.h
index 1d2f870deccf..9c60661283c7 100644
--- a/mm/hugetlb_internal.h
+++ b/mm/hugetlb_internal.h
@@ -106,6 +106,12 @@ extern ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
struct hstate *h, int nid,
unsigned long count, size_t len);
+#ifdef CONFIG_NUMA
+extern void do_zero_free_notify(struct hstate *h, int nid);
+#else
+static inline void do_zero_free_notify(struct hstate *h, int nid) {}
+#endif
+
extern void hugetlb_sysfs_init(void) __init;
#ifdef CONFIG_SYSCTL
diff --git a/mm/hugetlb_sysfs.c b/mm/hugetlb_sysfs.c
index 08ad39d3e022..c063237249f6 100644
--- a/mm/hugetlb_sysfs.c
+++ b/mm/hugetlb_sysfs.c
@@ -340,6 +340,7 @@ static bool hugetlb_sysfs_initialized __ro_after_init;
struct node_hstate_item {
struct kobject *hstate_kobj;
+ struct work_struct notify_work;
};
/*
@@ -355,6 +356,21 @@ struct node_hstate {
};
static struct node_hstate node_hstates[MAX_NUMNODES];
+static void pre_zero_notify_fun(struct work_struct *work)
+{
+ struct node_hstate_item *item =
+ container_of(work, struct node_hstate_item, notify_work);
+
+ sysfs_notify(item->hstate_kobj, NULL, "zeroable_hugepages");
+}
+
+void do_zero_free_notify(struct hstate *h, int nid)
+{
+ struct node_hstate *nhs = &node_hstates[nid];
+
+ schedule_work(&nhs->items[hstate_index(h)].notify_work);
+}
+
static ssize_t zeroable_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -564,8 +580,11 @@ void hugetlb_register_node(struct node *node)
return;
for_each_hstate(h) {
+ int index = hstate_index(h);
+ struct node_hstate_item *item = &nhs->items[index];
+
err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
- &nhs->items[hstate_index(h)].hstate_kobj,
+ &item->hstate_kobj,
&per_node_hstate_attr_group);
if (err) {
pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
@@ -573,6 +592,7 @@ void hugetlb_register_node(struct node *node)
hugetlb_unregister_node(node);
break;
}
+ INIT_WORK(&item->notify_work, pre_zero_notify_fun);
}
}
--
2.20.1
next prev parent reply other threads:[~2025-12-25 8:23 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-25 8:20 [PATCH 0/8] Introduce a huge-page pre-zeroing mechanism 李喆
2025-12-25 8:20 ` [PATCH 1/8] mm/hugetlb: add pre-zeroed framework 李喆
2025-12-26 9:24 ` Raghavendra K T
2025-12-26 9:48 ` Li Zhe
2025-12-25 8:20 ` [PATCH 2/8] mm/hugetlb: convert to prep_account_new_hugetlb_folio() 李喆
2025-12-25 8:20 ` [PATCH 3/8] mm/hugetlb: move the huge folio to the end of the list during enqueue 李喆
2025-12-25 8:20 ` [PATCH 4/8] mm/hugetlb: introduce per-node sysfs interface "zeroable_hugepages" 李喆
2025-12-26 18:51 ` Frank van der Linden
2025-12-29 12:25 ` Li Zhe
2025-12-29 18:57 ` Frank van der Linden
2025-12-30 2:41 ` Li Zhe
2025-12-25 8:20 ` [PATCH 5/8] mm/hugetlb: simplify function hugetlb_sysfs_add_hstate() 李喆
2025-12-25 8:20 ` [PATCH 6/8] mm/hugetlb: relocate the per-hstate struct kobject pointer 李喆
2025-12-25 8:20 ` 李喆 [this message]
2025-12-25 8:20 ` [PATCH 8/8] mm/hugetlb: limit event generation frequency of function do_zero_free_notify() 李喆
2025-12-26 18:32 ` [PATCH 0/8] Introduce a huge-page pre-zeroing mechanism Frank van der Linden
2025-12-26 21:42 ` Frank van der Linden
2025-12-29 12:28 ` Li Zhe
2025-12-27 7:21 ` Mateusz Guzik
2025-12-29 12:31 ` Li Zhe
2025-12-28 21:44 ` Andrew Morton
2025-12-29 12:34 ` Li Zhe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251225082059.1632-8-lizhe.67@bytedance.com \
--to=lizhe.67@bytedance.com \
--cc=akpm@linux-foundation.org \
--cc=david@kernel.org \
--cc=fvdl@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=muchun.song@linux.dev \
--cc=osalvador@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox