* [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available()
2025-09-09 9:56 [PATCH V10 0/5] riscv: mm: Add soft-dirty and uffd-wp support Chunyan Zhang
@ 2025-09-09 9:56 ` Chunyan Zhang
2025-09-09 11:42 ` David Hildenbrand
2025-09-09 9:56 ` [PATCH V10 2/5] mm: uffd_wp: Add pte_uffd_wp_available() Chunyan Zhang
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-09 9:56 UTC (permalink / raw)
To: linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
Some platforms can customize the PTE soft dirty bit and make it unavailable
even if the architecture allows providing the PTE resource.
Add an API which architectures can define their specific implementations
to detect if the PTE soft-dirty bit is available, on which the kernel
is running.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
fs/proc/task_mmu.c | 17 ++++++++++++++++-
include/linux/pgtable.h | 10 ++++++++++
mm/debug_vm_pgtable.c | 9 +++++----
mm/huge_memory.c | 10 ++++++----
mm/internal.h | 2 +-
mm/mremap.c | 10 ++++++----
mm/userfaultfd.c | 6 ++++--
7 files changed, 48 insertions(+), 16 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 29cca0e6d0ff..20a609ec1ba6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
* -Werror=unterminated-string-initialization warning
* with GCC 15
*/
- static const char mnemonics[BITS_PER_LONG][3] = {
+ static char mnemonics[BITS_PER_LONG][3] = {
/*
* In case if we meet a flag we don't know about.
*/
@@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_SEALED)] = "sl",
#endif
};
+/*
+ * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is
+ * unavailable on which the kernel is running, even if the architecture
+ * allows providing the PTE resource and soft-dirty is compiled in.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ if (!pte_soft_dirty_available())
+ mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
+#endif
+
size_t i;
seq_puts(m, "VmFlags: ");
@@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
static inline void clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ if (!pte_soft_dirty_available())
+ return;
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
@@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
{
pmd_t old, pmd = *pmdp;
+ if (!pte_soft_dirty_available())
+ return;
+
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..c0e2a6dc69f4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#endif
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+
+/*
+ * Some platforms can customize the PTE soft dirty bit and make it unavailable
+ * even if the architecture allows providing the PTE resource.
+ */
+#ifndef pte_soft_dirty_available
+#define pte_soft_dirty_available() (true)
+#endif
+
#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
@@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
}
#endif
#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
+#define pte_soft_dirty_available() (false)
static inline int pte_soft_dirty(pte_t pte)
{
return 0;
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..98ed7e22ccec 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
{
pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
return;
pr_debug("Validating PTE soft dirty\n");
@@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
{
pte_t pte;
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
return;
pr_debug("Validating PTE swap soft dirty\n");
@@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
{
pmd_t pmd;
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
return;
if (!has_transparent_hugepage())
@@ -735,7 +735,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
pmd_t pmd;
if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
- !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
+ !pte_soft_dirty_available() ||
+ !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
return;
if (!has_transparent_hugepage())
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c38a95e9f09..4e4fd56c0c18 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2272,10 +2272,12 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
static pmd_t move_soft_dirty_pmd(pmd_t pmd)
{
#ifdef CONFIG_MEM_SOFT_DIRTY
- if (unlikely(is_pmd_migration_entry(pmd)))
- pmd = pmd_swp_mksoft_dirty(pmd);
- else if (pmd_present(pmd))
- pmd = pmd_mksoft_dirty(pmd);
+ if (pte_soft_dirty_available()) {
+ if (unlikely(is_pmd_migration_entry(pmd)))
+ pmd = pmd_swp_mksoft_dirty(pmd);
+ else if (pmd_present(pmd))
+ pmd = pmd_mksoft_dirty(pmd);
+ }
#endif
return pmd;
}
diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc03..8a5b20fac892 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
* VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
* will be constantly true.
*/
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
return false;
/*
diff --git a/mm/mremap.c b/mm/mremap.c
index e618a706aff5..788dd8aaae47 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -163,10 +163,12 @@ static pte_t move_soft_dirty_pte(pte_t pte)
* in userspace the ptes were moved.
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
- if (pte_present(pte))
- pte = pte_mksoft_dirty(pte);
- else if (is_swap_pte(pte))
- pte = pte_swp_mksoft_dirty(pte);
+ if (pte_soft_dirty_available()) {
+ if (pte_present(pte))
+ pte = pte_mksoft_dirty(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_mksoft_dirty(pte);
+ }
#endif
return pte;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 45e6290e2e8b..94f159a680a4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1066,7 +1066,8 @@ static int move_present_pte(struct mm_struct *mm,
orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot);
/* Set soft dirty bit so userspace can notice the pte was moved */
#ifdef CONFIG_MEM_SOFT_DIRTY
- orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
+ if (pte_soft_dirty_available())
+ orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
#endif
if (pte_dirty(orig_src_pte))
orig_dst_pte = pte_mkdirty(orig_dst_pte);
@@ -1135,7 +1136,8 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
#ifdef CONFIG_MEM_SOFT_DIRTY
- orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+ if (pte_soft_dirty_available())
+ orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
#endif
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available()
2025-09-09 9:56 ` [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available() Chunyan Zhang
@ 2025-09-09 11:42 ` David Hildenbrand
2025-09-10 8:25 ` Chunyan Zhang
0 siblings, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-09-09 11:42 UTC (permalink / raw)
To: Chunyan Zhang, linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
Lorenzo Stoakes, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Axel Rasmussen,
Yuanchu Xie, Chunyan Zhang
On 09.09.25 11:56, Chunyan Zhang wrote:
> Some platforms can customize the PTE soft dirty bit and make it unavailable
> even if the architecture allows providing the PTE resource.
>
> Add an API which architectures can define their specific implementations
> to detect if the PTE soft-dirty bit is available, on which the kernel
> is running.
>
> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> ---
> fs/proc/task_mmu.c | 17 ++++++++++++++++-
> include/linux/pgtable.h | 10 ++++++++++
> mm/debug_vm_pgtable.c | 9 +++++----
> mm/huge_memory.c | 10 ++++++----
> mm/internal.h | 2 +-
> mm/mremap.c | 10 ++++++----
> mm/userfaultfd.c | 6 ++++--
> 7 files changed, 48 insertions(+), 16 deletions(-)
>
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 29cca0e6d0ff..20a609ec1ba6 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> * -Werror=unterminated-string-initialization warning
> * with GCC 15
> */
> - static const char mnemonics[BITS_PER_LONG][3] = {
> + static char mnemonics[BITS_PER_LONG][3] = {
> /*
> * In case if we meet a flag we don't know about.
> */
> @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> [ilog2(VM_SEALED)] = "sl",
> #endif
> };
> +/*
> + * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is
> + * unavailable on which the kernel is running, even if the architecture
> + * allows providing the PTE resource and soft-dirty is compiled in.
> + */
> +#ifdef CONFIG_MEM_SOFT_DIRTY
> + if (!pte_soft_dirty_available())
> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> +#endif
> +
> size_t i;
>
> seq_puts(m, "VmFlags: ");
> @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
> static inline void clear_soft_dirty(struct vm_area_struct *vma,
> unsigned long addr, pte_t *pte)
> {
> + if (!pte_soft_dirty_available())
> + return;
> /*
> * The soft-dirty tracker uses #PF-s to catch writes
> * to pages, so write-protect the pte as well. See the
> @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
> {
> pmd_t old, pmd = *pmdp;
>
> + if (!pte_soft_dirty_available())
> + return;
> +
> if (pmd_present(pmd)) {
> /* See comment in change_huge_pmd() */
> old = pmdp_invalidate(vma, addr, pmdp);
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 4c035637eeb7..c0e2a6dc69f4 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
> #endif
>
> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> +
> +/*
> + * Some platforms can customize the PTE soft dirty bit and make it unavailable
> + * even if the architecture allows providing the PTE resource.
> + */
> +#ifndef pte_soft_dirty_available
> +#define pte_soft_dirty_available() (true)
> +#endif
> +
> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> {
> @@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> }
> #endif
> #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
> +#define pte_soft_dirty_available() (false)
> static inline int pte_soft_dirty(pte_t pte)
> {
> return 0;
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 830107b6dd08..98ed7e22ccec 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
> {
> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
>
> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
I suggest that you instead make pte_soft_dirty_available() be false without CONFIG_MEM_SOFT_DIRTY.
e.g., for the default implementation
define pte_soft_dirty_available() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
That way you can avoid some ifefs and cleanup these checks.
But as we do also have PMD soft-dirty support, I guess we would want to call this
something more abstract "pgtable_soft_dirty_available" or "pgtable_soft_dirty_supported"
--
Cheers
David / dhildenb
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available()
2025-09-09 11:42 ` David Hildenbrand
@ 2025-09-10 8:25 ` Chunyan Zhang
2025-09-10 8:51 ` David Hildenbrand
0 siblings, 1 reply; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-10 8:25 UTC (permalink / raw)
To: David Hildenbrand
Cc: Chunyan Zhang, linux-riscv, linux-fsdevel, linux-mm,
linux-kernel, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, Deepak Gupta, Ved Shanbhogue, Alexander Viro,
Christian Brauner, Jan Kara, Andrew Morton, Peter Xu,
Arnd Bergmann, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie
Hi David,
On Tue, 9 Sept 2025 at 19:42, David Hildenbrand <david@redhat.com> wrote:
>
> On 09.09.25 11:56, Chunyan Zhang wrote:
> > Some platforms can customize the PTE soft dirty bit and make it unavailable
> > even if the architecture allows providing the PTE resource.
> >
> > Add an API which architectures can define their specific implementations
> > to detect if the PTE soft-dirty bit is available, on which the kernel
> > is running.
> >
> > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> > ---
> > fs/proc/task_mmu.c | 17 ++++++++++++++++-
> > include/linux/pgtable.h | 10 ++++++++++
> > mm/debug_vm_pgtable.c | 9 +++++----
> > mm/huge_memory.c | 10 ++++++----
> > mm/internal.h | 2 +-
> > mm/mremap.c | 10 ++++++----
> > mm/userfaultfd.c | 6 ++++--
> > 7 files changed, 48 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > index 29cca0e6d0ff..20a609ec1ba6 100644
> > --- a/fs/proc/task_mmu.c
> > +++ b/fs/proc/task_mmu.c
> > @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> > * -Werror=unterminated-string-initialization warning
> > * with GCC 15
> > */
> > - static const char mnemonics[BITS_PER_LONG][3] = {
> > + static char mnemonics[BITS_PER_LONG][3] = {
> > /*
> > * In case if we meet a flag we don't know about.
> > */
> > @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> > [ilog2(VM_SEALED)] = "sl",
> > #endif
> > };
> > +/*
> > + * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is
> > + * unavailable on which the kernel is running, even if the architecture
> > + * allows providing the PTE resource and soft-dirty is compiled in.
> > + */
> > +#ifdef CONFIG_MEM_SOFT_DIRTY
> > + if (!pte_soft_dirty_available())
> > + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> > +#endif
> > +
> > size_t i;
> >
> > seq_puts(m, "VmFlags: ");
> > @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
> > static inline void clear_soft_dirty(struct vm_area_struct *vma,
> > unsigned long addr, pte_t *pte)
> > {
> > + if (!pte_soft_dirty_available())
> > + return;
> > /*
> > * The soft-dirty tracker uses #PF-s to catch writes
> > * to pages, so write-protect the pte as well. See the
> > @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
> > {
> > pmd_t old, pmd = *pmdp;
> >
> > + if (!pte_soft_dirty_available())
> > + return;
> > +
> > if (pmd_present(pmd)) {
> > /* See comment in change_huge_pmd() */
> > old = pmdp_invalidate(vma, addr, pmdp);
> > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> > index 4c035637eeb7..c0e2a6dc69f4 100644
> > --- a/include/linux/pgtable.h
> > +++ b/include/linux/pgtable.h
> > @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
> > #endif
> >
> > #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> > +
> > +/*
> > + * Some platforms can customize the PTE soft dirty bit and make it unavailable
> > + * even if the architecture allows providing the PTE resource.
> > + */
> > +#ifndef pte_soft_dirty_available
> > +#define pte_soft_dirty_available() (true)
> > +#endif
> > +
> > #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> > static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> > {
> > @@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> > }
> > #endif
> > #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
> > +#define pte_soft_dirty_available() (false)
> > static inline int pte_soft_dirty(pte_t pte)
> > {
> > return 0;
> > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> > index 830107b6dd08..98ed7e22ccec 100644
> > --- a/mm/debug_vm_pgtable.c
> > +++ b/mm/debug_vm_pgtable.c
> > @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
> > {
> > pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
> >
> > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> > + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
>
> I suggest that you instead make pte_soft_dirty_available() be false without CONFIG_MEM_SOFT_DIRTY.
>
> e.g., for the default implementation
>
> define pte_soft_dirty_available() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
>
> That way you can avoid some ifefs and cleanup these checks.
Do you mean something like this:
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1538,6 +1538,16 @@ static inline pgprot_t pgprot_modify(pgprot_t
oldprot, pgprot_t newprot)
#endif
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifndef arch_soft_dirty_available
+#define arch_soft_dirty_available() (true)
+#endif
+#define pgtable_soft_dirty_supported()
(IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && arch_soft_dirty_available())
+
#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
@@ -1555,6 +1565,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
}
#endif
#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
+#define pgtable_soft_dirty_supported() (false)
>
>
> But as we do also have PMD soft-dirty support, I guess we would want to call this
> something more abstract "pgtable_soft_dirty_available" or "pgtable_soft_dirty_supported"
>
> --
> Cheers
>
> David / dhildenb
>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available()
2025-09-10 8:25 ` Chunyan Zhang
@ 2025-09-10 8:51 ` David Hildenbrand
2025-09-11 2:51 ` Chunyan Zhang
0 siblings, 1 reply; 12+ messages in thread
From: David Hildenbrand @ 2025-09-10 8:51 UTC (permalink / raw)
To: Chunyan Zhang
Cc: Chunyan Zhang, linux-riscv, linux-fsdevel, linux-mm,
linux-kernel, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, Deepak Gupta, Ved Shanbhogue, Alexander Viro,
Christian Brauner, Jan Kara, Andrew Morton, Peter Xu,
Arnd Bergmann, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie
On 10.09.25 10:25, Chunyan Zhang wrote:
> Hi David,
>
> On Tue, 9 Sept 2025 at 19:42, David Hildenbrand <david@redhat.com> wrote:
>>
>> On 09.09.25 11:56, Chunyan Zhang wrote:
>>> Some platforms can customize the PTE soft dirty bit and make it unavailable
>>> even if the architecture allows providing the PTE resource.
>>>
>>> Add an API which architectures can define their specific implementations
>>> to detect if the PTE soft-dirty bit is available, on which the kernel
>>> is running.
>>>
>>> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
>>> ---
>>> fs/proc/task_mmu.c | 17 ++++++++++++++++-
>>> include/linux/pgtable.h | 10 ++++++++++
>>> mm/debug_vm_pgtable.c | 9 +++++----
>>> mm/huge_memory.c | 10 ++++++----
>>> mm/internal.h | 2 +-
>>> mm/mremap.c | 10 ++++++----
>>> mm/userfaultfd.c | 6 ++++--
>>> 7 files changed, 48 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
>>> index 29cca0e6d0ff..20a609ec1ba6 100644
>>> --- a/fs/proc/task_mmu.c
>>> +++ b/fs/proc/task_mmu.c
>>> @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>>> * -Werror=unterminated-string-initialization warning
>>> * with GCC 15
>>> */
>>> - static const char mnemonics[BITS_PER_LONG][3] = {
>>> + static char mnemonics[BITS_PER_LONG][3] = {
>>> /*
>>> * In case if we meet a flag we don't know about.
>>> */
>>> @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>>> [ilog2(VM_SEALED)] = "sl",
>>> #endif
>>> };
>>> +/*
>>> + * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is
>>> + * unavailable on which the kernel is running, even if the architecture
>>> + * allows providing the PTE resource and soft-dirty is compiled in.
>>> + */
>>> +#ifdef CONFIG_MEM_SOFT_DIRTY
>>> + if (!pte_soft_dirty_available())
>>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
>>> +#endif
>>> +
>>> size_t i;
>>>
>>> seq_puts(m, "VmFlags: ");
>>> @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
>>> static inline void clear_soft_dirty(struct vm_area_struct *vma,
>>> unsigned long addr, pte_t *pte)
>>> {
>>> + if (!pte_soft_dirty_available())
>>> + return;
>>> /*
>>> * The soft-dirty tracker uses #PF-s to catch writes
>>> * to pages, so write-protect the pte as well. See the
>>> @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
>>> {
>>> pmd_t old, pmd = *pmdp;
>>>
>>> + if (!pte_soft_dirty_available())
>>> + return;
>>> +
>>> if (pmd_present(pmd)) {
>>> /* See comment in change_huge_pmd() */
>>> old = pmdp_invalidate(vma, addr, pmdp);
>>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
>>> index 4c035637eeb7..c0e2a6dc69f4 100644
>>> --- a/include/linux/pgtable.h
>>> +++ b/include/linux/pgtable.h
>>> @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
>>> #endif
>>>
>>> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>>> +
>>> +/*
>>> + * Some platforms can customize the PTE soft dirty bit and make it unavailable
>>> + * even if the architecture allows providing the PTE resource.
>>> + */
>>> +#ifndef pte_soft_dirty_available
>>> +#define pte_soft_dirty_available() (true)
>>> +#endif
>>> +
>>> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
>>> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
>>> {
>>> @@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
>>> }
>>> #endif
>>> #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
>>> +#define pte_soft_dirty_available() (false)
>>> static inline int pte_soft_dirty(pte_t pte)
>>> {
>>> return 0;
>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>> index 830107b6dd08..98ed7e22ccec 100644
>>> --- a/mm/debug_vm_pgtable.c
>>> +++ b/mm/debug_vm_pgtable.c
>>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
>>> {
>>> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
>>>
>>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
>>> + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
>>
>> I suggest that you instead make pte_soft_dirty_available() be false without CONFIG_MEM_SOFT_DIRTY.
>>
>> e.g., for the default implementation
>>
>> define pte_soft_dirty_available() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
>>
>> That way you can avoid some ifefs and cleanup these checks.
>
> Do you mean something like this:
>
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1538,6 +1538,16 @@ static inline pgprot_t pgprot_modify(pgprot_t
> oldprot, pgprot_t newprot)
> #endif
>
> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> +#ifndef arch_soft_dirty_available
> +#define arch_soft_dirty_available() (true)
> +#endif
> +#define pgtable_soft_dirty_supported()
> (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && arch_soft_dirty_available())
> +
> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> {
> @@ -1555,6 +1565,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> }
> #endif
> #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
> +#define pgtable_soft_dirty_supported() (false)
Maybe we can simplify to
#ifndef pgtable_soft_dirty_supported
#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
#endif
And then just let the arch that overrides this function just make it
respect IS_ENABLED(CONFIG_MEM_SOFT_DIRTY).
--
Cheers
David / dhildenb
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available()
2025-09-10 8:51 ` David Hildenbrand
@ 2025-09-11 2:51 ` Chunyan Zhang
0 siblings, 0 replies; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-11 2:51 UTC (permalink / raw)
To: David Hildenbrand
Cc: Chunyan Zhang, linux-riscv, linux-fsdevel, linux-mm,
linux-kernel, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, Deepak Gupta, Ved Shanbhogue, Alexander Viro,
Christian Brauner, Jan Kara, Andrew Morton, Peter Xu,
Arnd Bergmann, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie
On Wed, 10 Sept 2025 at 16:51, David Hildenbrand <david@redhat.com> wrote:
>
> On 10.09.25 10:25, Chunyan Zhang wrote:
> > Hi David,
> >
> > On Tue, 9 Sept 2025 at 19:42, David Hildenbrand <david@redhat.com> wrote:
> >>
> >> On 09.09.25 11:56, Chunyan Zhang wrote:
> >>> Some platforms can customize the PTE soft dirty bit and make it unavailable
> >>> even if the architecture allows providing the PTE resource.
> >>>
> >>> Add an API which architectures can define their specific implementations
> >>> to detect if the PTE soft-dirty bit is available, on which the kernel
> >>> is running.
> >>>
> >>> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> >>> ---
> >>> fs/proc/task_mmu.c | 17 ++++++++++++++++-
> >>> include/linux/pgtable.h | 10 ++++++++++
> >>> mm/debug_vm_pgtable.c | 9 +++++----
> >>> mm/huge_memory.c | 10 ++++++----
> >>> mm/internal.h | 2 +-
> >>> mm/mremap.c | 10 ++++++----
> >>> mm/userfaultfd.c | 6 ++++--
> >>> 7 files changed, 48 insertions(+), 16 deletions(-)
> >>>
> >>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> >>> index 29cca0e6d0ff..20a609ec1ba6 100644
> >>> --- a/fs/proc/task_mmu.c
> >>> +++ b/fs/proc/task_mmu.c
> >>> @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> >>> * -Werror=unterminated-string-initialization warning
> >>> * with GCC 15
> >>> */
> >>> - static const char mnemonics[BITS_PER_LONG][3] = {
> >>> + static char mnemonics[BITS_PER_LONG][3] = {
> >>> /*
> >>> * In case if we meet a flag we don't know about.
> >>> */
> >>> @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> >>> [ilog2(VM_SEALED)] = "sl",
> >>> #endif
> >>> };
> >>> +/*
> >>> + * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is
> >>> + * unavailable on which the kernel is running, even if the architecture
> >>> + * allows providing the PTE resource and soft-dirty is compiled in.
> >>> + */
> >>> +#ifdef CONFIG_MEM_SOFT_DIRTY
> >>> + if (!pte_soft_dirty_available())
> >>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> >>> +#endif
> >>> +
> >>> size_t i;
> >>>
> >>> seq_puts(m, "VmFlags: ");
> >>> @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
> >>> static inline void clear_soft_dirty(struct vm_area_struct *vma,
> >>> unsigned long addr, pte_t *pte)
> >>> {
> >>> + if (!pte_soft_dirty_available())
> >>> + return;
> >>> /*
> >>> * The soft-dirty tracker uses #PF-s to catch writes
> >>> * to pages, so write-protect the pte as well. See the
> >>> @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
> >>> {
> >>> pmd_t old, pmd = *pmdp;
> >>>
> >>> + if (!pte_soft_dirty_available())
> >>> + return;
> >>> +
> >>> if (pmd_present(pmd)) {
> >>> /* See comment in change_huge_pmd() */
> >>> old = pmdp_invalidate(vma, addr, pmdp);
> >>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> >>> index 4c035637eeb7..c0e2a6dc69f4 100644
> >>> --- a/include/linux/pgtable.h
> >>> +++ b/include/linux/pgtable.h
> >>> @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
> >>> #endif
> >>>
> >>> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> >>> +
> >>> +/*
> >>> + * Some platforms can customize the PTE soft dirty bit and make it unavailable
> >>> + * even if the architecture allows providing the PTE resource.
> >>> + */
> >>> +#ifndef pte_soft_dirty_available
> >>> +#define pte_soft_dirty_available() (true)
> >>> +#endif
> >>> +
> >>> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> >>> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> >>> {
> >>> @@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> >>> }
> >>> #endif
> >>> #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
> >>> +#define pte_soft_dirty_available() (false)
> >>> static inline int pte_soft_dirty(pte_t pte)
> >>> {
> >>> return 0;
> >>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> >>> index 830107b6dd08..98ed7e22ccec 100644
> >>> --- a/mm/debug_vm_pgtable.c
> >>> +++ b/mm/debug_vm_pgtable.c
> >>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
> >>> {
> >>> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
> >>>
> >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> >>> + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available())
> >>
> >> I suggest that you instead make pte_soft_dirty_available() be false without CONFIG_MEM_SOFT_DIRTY.
> >>
> >> e.g., for the default implementation
> >>
> >> define pte_soft_dirty_available() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
> >>
> >> That way you can avoid some ifefs and cleanup these checks.
> >
> > Do you mean something like this:
> >
> > --- a/include/linux/pgtable.h
> > +++ b/include/linux/pgtable.h
> > @@ -1538,6 +1538,16 @@ static inline pgprot_t pgprot_modify(pgprot_t
> > oldprot, pgprot_t newprot)
> > #endif
> >
> > #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> > +#ifndef arch_soft_dirty_available
> > +#define arch_soft_dirty_available() (true)
> > +#endif
> > +#define pgtable_soft_dirty_supported()
> > (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && arch_soft_dirty_available())
> > +
> > #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> > static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> > {
> > @@ -1555,6 +1565,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> > }
> > #endif
> > #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
> > +#define pgtable_soft_dirty_supported() (false)
>
> Maybe we can simplify to
>
> #ifndef pgtable_soft_dirty_supported
> #define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
> #endif
>
> And then just let the arch that overrides this function just make it
> respect IS_ENABLED(CONFIG_MEM_SOFT_DIRTY).
Ok, got you, I will address it.
Thanks for your review,
Chunyan
>
> --
> Cheers
>
> David / dhildenb
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V10 2/5] mm: uffd_wp: Add pte_uffd_wp_available()
2025-09-09 9:56 [PATCH V10 0/5] riscv: mm: Add soft-dirty and uffd-wp support Chunyan Zhang
2025-09-09 9:56 ` [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available() Chunyan Zhang
@ 2025-09-09 9:56 ` Chunyan Zhang
2025-09-09 11:43 ` David Hildenbrand
2025-09-09 9:56 ` [PATCH V10 3/5] riscv: Add RISC-V Svrsw60t59b extension support Chunyan Zhang
` (2 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-09 9:56 UTC (permalink / raw)
To: linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
Some platforms can customize the PTE uffd_wp bit and make it unavailable
even if the architecture allows providing the PTE resource.
This patch adds a macro API which allows architectures to define
their specific ones for checking if the PTE uffd_wp bit is available.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
fs/userfaultfd.c | 25 +++++++++--------
include/asm-generic/pgtable_uffd.h | 12 ++++++++
include/linux/mm_inline.h | 7 +++++
include/linux/userfaultfd_k.h | 44 +++++++++++++++++++-----------
mm/memory.c | 6 ++--
5 files changed, 65 insertions(+), 29 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 54c6cc7fe9c6..68e5006e5158 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1270,9 +1270,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- goto out;
-#endif
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) ||
+ !pte_uffd_wp_available())
+ goto out;
+
vm_flags |= VM_UFFD_WP;
}
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) {
@@ -1980,14 +1981,16 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &=
~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
#endif
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
-#endif
-#ifndef CONFIG_PTE_MARKER_UFFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
- uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
- uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
-#endif
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) ||
+ !pte_uffd_wp_available())
+ uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+
+ if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) ||
+ !pte_uffd_wp_available()) {
+ uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
+ }
ret = -EINVAL;
if (features & ~uffdio_api.features)
diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h
index 828966d4c281..abab46bd718b 100644
--- a/include/asm-generic/pgtable_uffd.h
+++ b/include/asm-generic/pgtable_uffd.h
@@ -61,6 +61,18 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
{
return pmd;
}
+#define pte_uffd_wp_available() (false)
+#else
+/*
+ * Some platforms can customize the PTE uffd_wp bit and make it unavailable
+ * even if the architecture allows providing the PTE resource.
+ * It allows architectures to define their APIs to check if the PTE
+ * uffd_wp bit is available on the specific devices.
+ */
+#ifndef pte_uffd_wp_available
+#define pte_uffd_wp_available() (true)
+#endif
+
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 89b518ff097e..4e5a8a265642 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -571,6 +571,13 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
pte_t *pte, pte_t pteval)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
+ /*
+ * Some platforms can customize the PTE uffd_wp bit and make it unavailable
+ * even if the architecture allows providing the PTE resource.
+ */
+ if (!pte_uffd_wp_available())
+ return false;
+
bool arm_uffd_pte = false;
/* The current status of the pte should be "cleared" before calling */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index c0e716aec26a..ec4a815286c8 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -228,15 +228,15 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
if (wp_async && (vm_flags == VM_UFFD_WP))
return true;
-#ifndef CONFIG_PTE_MARKER_UFFD_WP
/*
* If user requested uffd-wp but not enabled pte markers for
* uffd-wp, then shmem & hugetlbfs are not supported but only
* anonymous.
*/
- if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
+ if ((!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) ||
+ !pte_uffd_wp_available()) &&
+ (vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
return false;
-#endif
/* By default, allow any of anon|shmem|hugetlb */
return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
@@ -437,8 +437,11 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- return is_pte_marker_entry(entry) &&
- (pte_marker_get(entry) & PTE_MARKER_UFFD_WP);
+ if (pte_uffd_wp_available())
+ return is_pte_marker_entry(entry) &&
+ (pte_marker_get(entry) & PTE_MARKER_UFFD_WP);
+ else
+ return false;
#else
return false;
#endif
@@ -447,14 +450,19 @@ static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry)
static inline bool pte_marker_uffd_wp(pte_t pte)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- swp_entry_t entry;
+ if (pte_uffd_wp_available()) {
+ swp_entry_t entry;
- if (!is_swap_pte(pte))
- return false;
+ if (!is_swap_pte(pte))
+ return false;
- entry = pte_to_swp_entry(pte);
+ entry = pte_to_swp_entry(pte);
+
+ return pte_marker_entry_uffd_wp(entry);
+ } else {
+ return false;
+ }
- return pte_marker_entry_uffd_wp(entry);
#else
return false;
#endif
@@ -467,14 +475,18 @@ static inline bool pte_marker_uffd_wp(pte_t pte)
static inline bool pte_swp_uffd_wp_any(pte_t pte)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- if (!is_swap_pte(pte))
- return false;
+ if (pte_uffd_wp_available()) {
+ if (!is_swap_pte(pte))
+ return false;
- if (pte_swp_uffd_wp(pte))
- return true;
+ if (pte_swp_uffd_wp(pte))
+ return true;
- if (pte_marker_uffd_wp(pte))
- return true;
+ if (pte_marker_uffd_wp(pte))
+ return true;
+ } else {
+ return false;
+ }
#endif
return false;
}
diff --git a/mm/memory.c b/mm/memory.c
index 0ba4f6b71847..d6c874221433 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1465,7 +1465,9 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
{
bool was_installed = false;
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
+ if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) || !pte_uffd_wp_available())
+ return false;
+
/* Zap on anonymous always means dropping everything */
if (vma_is_anonymous(vma))
return false;
@@ -1482,7 +1484,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
pte++;
addr += PAGE_SIZE;
}
-#endif
+
return was_installed;
}
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 2/5] mm: uffd_wp: Add pte_uffd_wp_available()
2025-09-09 9:56 ` [PATCH V10 2/5] mm: uffd_wp: Add pte_uffd_wp_available() Chunyan Zhang
@ 2025-09-09 11:43 ` David Hildenbrand
0 siblings, 0 replies; 12+ messages in thread
From: David Hildenbrand @ 2025-09-09 11:43 UTC (permalink / raw)
To: Chunyan Zhang, linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
Lorenzo Stoakes, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Axel Rasmussen,
Yuanchu Xie, Chunyan Zhang
On 09.09.25 11:56, Chunyan Zhang wrote:
> Some platforms can customize the PTE uffd_wp bit and make it unavailable
> even if the architecture allows providing the PTE resource.
> This patch adds a macro API which allows architectures to define
> their specific ones for checking if the PTE uffd_wp bit is available.
>
> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> ---
> fs/userfaultfd.c | 25 +++++++++--------
> include/asm-generic/pgtable_uffd.h | 12 ++++++++
> include/linux/mm_inline.h | 7 +++++
> include/linux/userfaultfd_k.h | 44 +++++++++++++++++++-----------
> mm/memory.c | 6 ++--
> 5 files changed, 65 insertions(+), 29 deletions(-)
>
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 54c6cc7fe9c6..68e5006e5158 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -1270,9 +1270,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
> if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
> vm_flags |= VM_UFFD_MISSING;
> if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
> -#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
> - goto out;
> -#endif
> + if (!IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) ||
> + !pte_uffd_wp_available())
> + goto out;
> +
Same comment as for the other patch: make the
CONFIG_HAVE_ARCH_USERFAULTFD_WP part of the pte_uffd_wp_available()
check and better call it "pgtable_uffd_wp_" ... available/supported.
--
Cheers
David / dhildenb
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V10 3/5] riscv: Add RISC-V Svrsw60t59b extension support
2025-09-09 9:56 [PATCH V10 0/5] riscv: mm: Add soft-dirty and uffd-wp support Chunyan Zhang
2025-09-09 9:56 ` [PATCH V10 1/5] mm: softdirty: Add pte_soft_dirty_available() Chunyan Zhang
2025-09-09 9:56 ` [PATCH V10 2/5] mm: uffd_wp: Add pte_uffd_wp_available() Chunyan Zhang
@ 2025-09-09 9:56 ` Chunyan Zhang
2025-09-09 17:12 ` Andrew Jones
2025-09-09 9:56 ` [PATCH V10 4/5] riscv: mm: Add soft-dirty page tracking support Chunyan Zhang
2025-09-09 9:56 ` [PATCH V10 5/5] riscv: mm: Add uffd write-protect support Chunyan Zhang
4 siblings, 1 reply; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-09 9:56 UTC (permalink / raw)
To: linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
The Svrsw60t59b extension allows to free the PTE reserved bits 60
and 59 for software to use.
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
arch/riscv/Kconfig | 14 ++++++++++++++
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
3 files changed, 16 insertions(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a4b233a0659e..d99df67cc7a4 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -862,6 +862,20 @@ config RISCV_ISA_ZICBOP
If you don't know what to do here, say Y.
+config RISCV_ISA_SVRSW60T59B
+ bool "Svrsw60t59b extension support for using PTE bits 60 and 59"
+ depends on MMU && 64BIT
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the Svrsw60t59b
+ extension and enable its usage.
+
+ The Svrsw60t59b extension allows to free the PTE reserved bits 60
+ and 59 for software to use.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index affd63e11b0a..f98fcb5c17d5 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -106,6 +106,7 @@
#define RISCV_ISA_EXT_ZAAMO 97
#define RISCV_ISA_EXT_ZALRSC 98
#define RISCV_ISA_EXT_ZICBOP 99
+#define RISCV_ISA_EXT_SVRSW60T59B 100
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 743d53415572..de29562096ff 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -540,6 +540,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
__RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC),
+ __RISCV_ISA_EXT_DATA(svrsw60t59b, RISCV_ISA_EXT_SVRSW60T59B),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V10 3/5] riscv: Add RISC-V Svrsw60t59b extension support
2025-09-09 9:56 ` [PATCH V10 3/5] riscv: Add RISC-V Svrsw60t59b extension support Chunyan Zhang
@ 2025-09-09 17:12 ` Andrew Jones
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Jones @ 2025-09-09 17:12 UTC (permalink / raw)
To: Chunyan Zhang
Cc: linux-riscv, linux-fsdevel, linux-mm, linux-kernel,
Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
On Tue, Sep 09, 2025 at 05:56:09PM +0800, Chunyan Zhang wrote:
> The Svrsw60t59b extension allows to free the PTE reserved bits 60
> and 59 for software to use.
>
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> ---
> arch/riscv/Kconfig | 14 ++++++++++++++
> arch/riscv/include/asm/hwcap.h | 1 +
> arch/riscv/kernel/cpufeature.c | 1 +
> 3 files changed, 16 insertions(+)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index a4b233a0659e..d99df67cc7a4 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -862,6 +862,20 @@ config RISCV_ISA_ZICBOP
>
> If you don't know what to do here, say Y.
>
> +config RISCV_ISA_SVRSW60T59B
> + bool "Svrsw60t59b extension support for using PTE bits 60 and 59"
> + depends on MMU && 64BIT
> + depends on RISCV_ALTERNATIVE
> + default y
> + help
> + Adds support to dynamically detect the presence of the Svrsw60t59b
> + extension and enable its usage.
> +
> + The Svrsw60t59b extension allows to free the PTE reserved bits 60
> + and 59 for software to use.
> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
> def_bool y
> # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index affd63e11b0a..f98fcb5c17d5 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -106,6 +106,7 @@
> #define RISCV_ISA_EXT_ZAAMO 97
> #define RISCV_ISA_EXT_ZALRSC 98
> #define RISCV_ISA_EXT_ZICBOP 99
> +#define RISCV_ISA_EXT_SVRSW60T59B 100
>
> #define RISCV_ISA_EXT_XLINUXENVCFG 127
>
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 743d53415572..de29562096ff 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -540,6 +540,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
> __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
> __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC),
> + __RISCV_ISA_EXT_DATA(svrsw60t59b, RISCV_ISA_EXT_SVRSW60T59B),
svrsw60t59b should come before svvptc. See the ordering rule comment at
the top of the array.
Otherwise,
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> };
>
> const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V10 4/5] riscv: mm: Add soft-dirty page tracking support
2025-09-09 9:56 [PATCH V10 0/5] riscv: mm: Add soft-dirty and uffd-wp support Chunyan Zhang
` (2 preceding siblings ...)
2025-09-09 9:56 ` [PATCH V10 3/5] riscv: Add RISC-V Svrsw60t59b extension support Chunyan Zhang
@ 2025-09-09 9:56 ` Chunyan Zhang
2025-09-09 9:56 ` [PATCH V10 5/5] riscv: mm: Add uffd write-protect support Chunyan Zhang
4 siblings, 0 replies; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-09 9:56 UTC (permalink / raw)
To: linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59
for software, this patch uses bit 59 for soft-dirty.
To add swap PTE soft-dirty tracking, we borrow bit 3 which is available
for swap PTEs on RISC-V systems.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable-bits.h | 19 +++++++
arch/riscv/include/asm/pgtable.h | 73 ++++++++++++++++++++++++++-
3 files changed, 91 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d99df67cc7a4..53b73e4bdf3f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -141,6 +141,7 @@ config RISCV
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SOFT_DIRTY if 64BIT && MMU && RISCV_ISA_SVRSW60T59B
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index 179bd4afece4..8ffe81bf66d2 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -19,6 +19,25 @@
#define _PAGE_SOFT (3 << 8) /* Reserved for software */
#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+
+/* ext_svrsw60t59b: bit 59 for software dirty tracking */
+#define _PAGE_SOFT_DIRTY \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ (1UL << 59) : 0)
+/*
+ * Bit 3 is always zero for swap entry computation, so we
+ * can borrow it for swap page soft-dirty tracking.
+ */
+#define _PAGE_SWP_SOFT_DIRTY \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ _PAGE_EXEC : 0)
+#else
+#define _PAGE_SOFT_DIRTY 0
+#define _PAGE_SWP_SOFT_DIRTY 0
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
#define _PAGE_TABLE _PAGE_PRESENT
/*
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 91697fbf1f90..b2d00d129d81 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -427,7 +427,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY);
+ return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_mkclean(pte_t pte)
@@ -455,6 +455,40 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pte_soft_dirty_available() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)
+
+static inline bool pte_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY));
+}
+
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
#ifdef CONFIG_RISCV_ISA_SVNAPOT
#define pte_leaf_size(pte) (pte_napot(pte) ? \
napot_cont_size(napot_cont_order(pte)) :\
@@ -802,6 +836,40 @@ static inline pud_t pud_mkspecial(pud_t pud)
}
#endif
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pmd_soft_dirty(pmd_t pmd)
+{
+ return pte_soft_dirty(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)));
+}
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline bool pmd_swp_soft_dirty(pmd_t pmd)
+{
+ return pte_swp_soft_dirty(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)));
+}
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
@@ -983,7 +1051,8 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
- * bit 1 to 3: _PAGE_LEAF (zero)
+ * bit 1 to 2: (zero)
+ * bit 3: _PAGE_SWP_SOFT_DIRTY
* bit 5: _PAGE_PROT_NONE (zero)
* bit 6: exclusive marker
* bits 7 to 11: swap type
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH V10 5/5] riscv: mm: Add uffd write-protect support
2025-09-09 9:56 [PATCH V10 0/5] riscv: mm: Add soft-dirty and uffd-wp support Chunyan Zhang
` (3 preceding siblings ...)
2025-09-09 9:56 ` [PATCH V10 4/5] riscv: mm: Add soft-dirty page tracking support Chunyan Zhang
@ 2025-09-09 9:56 ` Chunyan Zhang
4 siblings, 0 replies; 12+ messages in thread
From: Chunyan Zhang @ 2025-09-09 9:56 UTC (permalink / raw)
To: linux-riscv, linux-fsdevel, linux-mm, linux-kernel
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Deepak Gupta, Ved Shanbhogue, Alexander Viro, Christian Brauner,
Jan Kara, Andrew Morton, Peter Xu, Arnd Bergmann,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Axel Rasmussen, Yuanchu Xie, Chunyan Zhang
The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59
for software, this patch uses bit 60 for uffd-wp tracking
Additionally for tracking the uffd-wp state as a PTE swap bit, we borrow
bit 4 which is not involved into swap entry computation.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable-bits.h | 18 +++++++
arch/riscv/include/asm/pgtable.h | 67 +++++++++++++++++++++++++++
3 files changed, 86 insertions(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 53b73e4bdf3f..f928768bb14a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -147,6 +147,7 @@ config RISCV
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU
select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD
+ select HAVE_ARCH_USERFAULTFD_WP if 64BIT && MMU && USERFAULTFD && RISCV_ISA_SVRSW60T59B
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
select HAVE_CONTEXT_TRACKING_USER
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index 8ffe81bf66d2..894b2a24fc49 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -38,6 +38,24 @@
#define _PAGE_SWP_SOFT_DIRTY 0
#endif /* CONFIG_MEM_SOFT_DIRTY */
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+
+/* ext_svrsw60t59b: Bit(60) for uffd-wp tracking */
+#define _PAGE_UFFD_WP \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ (1UL << 60) : 0)
+/*
+ * Bit 4 is not involved into swap entry computation, so we
+ * can borrow it for swap page uffd-wp tracking.
+ */
+#define _PAGE_SWP_UFFD_WP \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ _PAGE_USER : 0)
+#else
+#define _PAGE_UFFD_WP 0
+#define _PAGE_SWP_UFFD_WP 0
+#endif
+
#define _PAGE_TABLE _PAGE_PRESENT
/*
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b2d00d129d81..94cc97d3dbff 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -416,6 +416,40 @@ static inline pte_t pte_wrprotect(pte_t pte)
return __pte(pte_val(pte) & ~(_PAGE_WRITE));
}
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define pte_uffd_wp_available() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)
+
+static inline bool pte_uffd_wp(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_UFFD_WP);
+}
+
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+ return pte_wrprotect(__pte(pte_val(pte) | _PAGE_UFFD_WP));
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_UFFD_WP));
+}
+
+static inline bool pte_swp_uffd_wp(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SWP_UFFD_WP);
+}
+
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_UFFD_WP);
+}
+
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_SWP_UFFD_WP));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
/* static inline pte_t pte_mkread(pte_t pte) */
static inline pte_t pte_mkwrite_novma(pte_t pte)
@@ -836,6 +870,38 @@ static inline pud_t pud_mkspecial(pud_t pud)
}
#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline bool pmd_uffd_wp(pmd_t pmd)
+{
+ return pte_uffd_wp(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+{
+ return pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+{
+ return pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)));
+}
+
+static inline bool pmd_swp_uffd_wp(pmd_t pmd)
+{
+ return pte_swp_uffd_wp(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline bool pmd_soft_dirty(pmd_t pmd)
{
@@ -1053,6 +1119,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
* bit 0: _PAGE_PRESENT (zero)
* bit 1 to 2: (zero)
* bit 3: _PAGE_SWP_SOFT_DIRTY
+ * bit 4: _PAGE_SWP_UFFD_WP
* bit 5: _PAGE_PROT_NONE (zero)
* bit 6: exclusive marker
* bits 7 to 11: swap type
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread