From: Longlong Xia <xialonglong@kylinos.cn>
To: david@redhat.com, linmiaohe@huawei.com
Cc: lance.yang@linux.dev, markus.elfring@web.de,
nao.horiguchi@gmail.com, akpm@linux-foundation.org,
wangkefeng.wang@huawei.com, qiuxu.zhuo@intel.com,
xu.xin16@zte.com.cn, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, Longlong Xia <xialonglong@kylinos.cn>
Subject: [PATCH v3 1/2] mm/ksm: add helper to allocate and initialize stable node duplicates
Date: Mon, 3 Nov 2025 23:16:00 +0800 [thread overview]
Message-ID: <20251103151601.3280700-2-xialonglong@kylinos.cn> (raw)
In-Reply-To: <20251103151601.3280700-1-xialonglong@kylinos.cn>
Consolidate the duplicated stable_node allocation and initialization
code in stable_tree_insert() into a new helper function
alloc_init_stable_node_dup().
Also refactor write_protect_page() and replace_page() to expose
address-based variants (_addr suffix). The wrappers maintain existing
behavior by calculating the address first.
This refactoring prepares for the upcoming memory error recovery
feature, which will need to:
1) Allocate and initialize stable_node duplicates
2) Operate on specific addresses without re-calculation
No functional changes.
Signed-off-by: Longlong Xia <xialonglong@kylinos.cn>
---
mm/ksm.c | 89 +++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 63 insertions(+), 26 deletions(-)
diff --git a/mm/ksm.c b/mm/ksm.c
index 160787bb121c..13ec057667af 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1247,11 +1247,11 @@ static u32 calc_checksum(struct page *page)
return checksum;
}
-static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
- pte_t *orig_pte)
+static int write_protect_page_addr(struct vm_area_struct *vma, struct folio *folio,
+ unsigned long address, pte_t *orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
- DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, 0, 0);
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
int swapped;
int err = -EFAULT;
struct mmu_notifier_range range;
@@ -1261,10 +1261,10 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
if (WARN_ON_ONCE(folio_test_large(folio)))
return err;
- pvmw.address = page_address_in_vma(folio, folio_page(folio, 0), vma);
- if (pvmw.address == -EFAULT)
- goto out;
+ if (address < vma->vm_start || address >= vma->vm_end)
+ return err;
+ pvmw.address = address;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, pvmw.address,
pvmw.address + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);
@@ -1334,21 +1334,26 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
page_vma_mapped_walk_done(&pvmw);
out_mn:
mmu_notifier_invalidate_range_end(&range);
-out:
return err;
}
-/**
- * replace_page - replace page in vma by new ksm page
- * @vma: vma that holds the pte pointing to page
- * @page: the page we are replacing by kpage
- * @kpage: the ksm page we replace page by
- * @orig_pte: the original value of the pte
- *
- * Returns 0 on success, -EFAULT on failure.
- */
-static int replace_page(struct vm_area_struct *vma, struct page *page,
- struct page *kpage, pte_t orig_pte)
+static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
+ pte_t *orig_pte)
+{
+ unsigned long address;
+
+ if (WARN_ON_ONCE(folio_test_large(folio)))
+ return -EFAULT;
+
+ address = page_address_in_vma(folio, folio_page(folio, 0), vma);
+ if (address == -EFAULT)
+ return -EFAULT;
+
+ return write_protect_page_addr(vma, folio, address, orig_pte);
+}
+
+static int replace_page_addr(struct vm_area_struct *vma, struct page *page,
+ struct page *kpage, unsigned long addr, pte_t orig_pte)
{
struct folio *kfolio = page_folio(kpage);
struct mm_struct *mm = vma->vm_mm;
@@ -1358,17 +1363,16 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
pte_t *ptep;
pte_t newpte;
spinlock_t *ptl;
- unsigned long addr;
int err = -EFAULT;
struct mmu_notifier_range range;
- addr = page_address_in_vma(folio, page, vma);
- if (addr == -EFAULT)
+ if (addr < vma->vm_start || addr >= vma->vm_end)
goto out;
pmd = mm_find_pmd(mm, addr);
if (!pmd)
goto out;
+
/*
* Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
* without holding anon_vma lock for write. So when looking for a
@@ -1441,6 +1445,29 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
return err;
}
+
+/**
+ * replace_page - replace page in vma by new ksm page
+ * @vma: vma that holds the pte pointing to page
+ * @page: the page we are replacing by kpage
+ * @kpage: the ksm page we replace page by
+ * @orig_pte: the original value of the pte
+ *
+ * Returns 0 on success, -EFAULT on failure.
+ */
+static int replace_page(struct vm_area_struct *vma, struct page *page,
+ struct page *kpage, pte_t orig_pte)
+{
+ unsigned long addr;
+ struct folio *folio = page_folio(page);
+
+ addr = page_address_in_vma(folio, page, vma);
+ if (addr == -EFAULT)
+ return -EFAULT;
+
+ return replace_page_addr(vma, page, kpage, addr, orig_pte);
+}
+
/*
* try_to_merge_one_page - take two pages and merge them into one
* @vma: the vma that holds the pte pointing to page
@@ -2007,6 +2034,20 @@ static struct folio *stable_tree_search(struct page *page)
goto out;
}
+static struct ksm_stable_node *alloc_init_stable_node_dup(unsigned long kpfn,
+ int nid __maybe_unused)
+{
+ struct ksm_stable_node *stable_node = alloc_stable_node();
+
+ if (stable_node) {
+ INIT_HLIST_HEAD(&stable_node->hlist);
+ stable_node->kpfn = kpfn;
+ stable_node->rmap_hlist_len = 0;
+ DO_NUMA(stable_node->nid = nid);
+ }
+ return stable_node;
+}
+
/*
* stable_tree_insert - insert stable tree node pointing to new ksm page
* into the stable tree.
@@ -2065,14 +2106,10 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
}
}
- stable_node_dup = alloc_stable_node();
+ stable_node_dup = alloc_init_stable_node_dup(kpfn, nid);
if (!stable_node_dup)
return NULL;
- INIT_HLIST_HEAD(&stable_node_dup->hlist);
- stable_node_dup->kpfn = kpfn;
- stable_node_dup->rmap_hlist_len = 0;
- DO_NUMA(stable_node_dup->nid = nid);
if (!need_chain) {
rb_link_node(&stable_node_dup->node, parent, new);
rb_insert_color(&stable_node_dup->node, root);
--
2.43.0
next prev parent reply other threads:[~2025-11-03 15:16 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-16 10:18 [PATCH v2 0/1] mm/ksm: recover from memory failure on KSM page by migrating to healthy duplicate Longlong Xia
2025-10-16 10:18 ` [PATCH v2 1/1] " Longlong Xia
2025-10-16 14:37 ` [PATCH v2] " Markus Elfring
2025-10-17 3:09 ` [PATCH v2 1/1] " kernel test robot
2025-10-23 11:54 ` Miaohe Lin
2025-10-28 7:54 ` Long long Xia
2025-10-29 6:40 ` Miaohe Lin
2025-10-29 7:12 ` Long long Xia
2025-10-30 2:56 ` Miaohe Lin
2025-10-28 9:44 ` David Hildenbrand
2025-11-03 15:15 ` [PATCH v3 0/2] mm/ksm: try " Longlong Xia
2025-11-03 15:16 ` Longlong Xia [this message]
2025-11-03 15:16 ` [PATCH v3 2/2] " Longlong Xia
2025-10-16 10:46 ` [PATCH v2 0/1] mm/ksm: " David Hildenbrand
2025-10-21 14:00 ` Long long Xia
2025-10-23 16:16 ` David Hildenbrand
2025-10-16 11:01 ` Markus Elfring
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251103151601.3280700-2-xialonglong@kylinos.cn \
--to=xialonglong@kylinos.cn \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=lance.yang@linux.dev \
--cc=linmiaohe@huawei.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=markus.elfring@web.de \
--cc=nao.horiguchi@gmail.com \
--cc=qiuxu.zhuo@intel.com \
--cc=wangkefeng.wang@huawei.com \
--cc=xu.xin16@zte.com.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox