linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
@ 2025-06-04  3:21 Alistair Popple
       [not found] ` <CGME20250610161811eucas1p18de4ba7b320b6d6ff7da44786b350b6e@eucas1p1.samsung.com>
  0 siblings, 1 reply; 7+ messages in thread
From: Alistair Popple @ 2025-06-04  3:21 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: Alistair Popple, David Hildenbrand, Christoph Hellwig,
	Jason Gunthorpe, gerald.schaefer, dan.j.williams, jgg, willy,
	linux-kernel, nvdimm, jhubbard, zhang.lyra, debug, bjorn,
	balbirs, lorenzo.stoakes, John

The PFN_MAP flag is no longer used for anything, so remove it.
The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
used so also remove them. The last user of PFN_SPECIAL was removed
by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
support").

Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: gerald.schaefer@linux.ibm.com
Cc: dan.j.williams@intel.com
Cc: jgg@ziepe.ca
Cc: willy@infradead.org
Cc: david@redhat.com
Cc: linux-kernel@vger.kernel.org
Cc: nvdimm@lists.linux.dev
Cc: jhubbard@nvidia.com
Cc: hch@lst.de
Cc: zhang.lyra@gmail.com
Cc: debug@rivosinc.com
Cc: bjorn@kernel.org
Cc: balbirs@nvidia.com
Cc: lorenzo.stoakes@oracle.com
Cc: John@Groves.net

---

Splitting this off from the rest of my series[1] as a separate clean-up
for consideration for the v6.16 merge window as suggested by Christoph.

[1] - https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
---
 include/linux/pfn_t.h             | 31 +++----------------------------
 mm/memory.c                       |  2 --
 tools/testing/nvdimm/test/iomap.c |  4 ----
 3 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 2d9148221e9a..46afa12eb33b 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -5,26 +5,13 @@
 
 /*
  * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
- * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
- * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
  * PFN_DEV - pfn is not covered by system memmap by default
- * PFN_MAP - pfn has a dynamic page mapping established by a device driver
- * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
- *		 get_user_pages
  */
 #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
-#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
-#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
-#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
-#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
 
 #define PFN_FLAGS_TRACE \
-	{ PFN_SPECIAL,	"SPECIAL" }, \
-	{ PFN_SG_CHAIN,	"SG_CHAIN" }, \
-	{ PFN_SG_LAST,	"SG_LAST" }, \
-	{ PFN_DEV,	"DEV" }, \
-	{ PFN_MAP,	"MAP" }
+	{ PFN_DEV,	"DEV" }
 
 static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
 {
@@ -46,7 +33,7 @@ static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
-	return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
+	return (pfn.val & PFN_DEV) == 0;
 }
 
 static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
@@ -100,7 +87,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
 static inline bool pfn_t_devmap(pfn_t pfn)
 {
-	const u64 flags = PFN_DEV|PFN_MAP;
+	const u64 flags = PFN_DEV;
 
 	return (pfn.val & flags) == flags;
 }
@@ -116,16 +103,4 @@ pmd_t pmd_mkdevmap(pmd_t pmd);
 pud_t pud_mkdevmap(pud_t pud);
 #endif
 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
-
-#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-static inline bool pfn_t_special(pfn_t pfn)
-{
-	return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
-}
-#else
-static inline bool pfn_t_special(pfn_t pfn)
-{
-	return false;
-}
-#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
 #endif /* _LINUX_PFN_T_H_ */
diff --git a/mm/memory.c b/mm/memory.c
index 49199410805c..cc85f814bc1c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2569,8 +2569,6 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite)
 		return true;
 	if (pfn_t_devmap(pfn))
 		return true;
-	if (pfn_t_special(pfn))
-		return true;
 	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
 		return true;
 	return false;
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e4313726fae3..ddceb04b4a9a 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -137,10 +137,6 @@ EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
 
 pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
-	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
-
-	if (nfit_res)
-		flags &= ~PFN_MAP;
         return phys_to_pfn_t(addr, flags);
 }
 EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-- 
2.47.2



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
       [not found] ` <CGME20250610161811eucas1p18de4ba7b320b6d6ff7da44786b350b6e@eucas1p1.samsung.com>
@ 2025-06-10 16:18   ` Marek Szyprowski
  2025-06-11  2:38     ` Alistair Popple
  0 siblings, 1 reply; 7+ messages in thread
From: Marek Szyprowski @ 2025-06-10 16:18 UTC (permalink / raw)
  To: Alistair Popple, linux-mm, akpm, linux-riscv
  Cc: David Hildenbrand, Christoph Hellwig, Jason Gunthorpe,
	gerald.schaefer, dan.j.williams, jgg, willy, linux-kernel,
	nvdimm, jhubbard, zhang.lyra, debug, bjorn, balbirs,
	lorenzo.stoakes, John

Dear All,

On 04.06.2025 05:21, Alistair Popple wrote:
> The PFN_MAP flag is no longer used for anything, so remove it.
> The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
> used so also remove them. The last user of PFN_SPECIAL was removed
> by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
> support").
>
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> Acked-by: David Hildenbrand <david@redhat.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Cc: gerald.schaefer@linux.ibm.com
> Cc: dan.j.williams@intel.com
> Cc: jgg@ziepe.ca
> Cc: willy@infradead.org
> Cc: david@redhat.com
> Cc: linux-kernel@vger.kernel.org
> Cc: nvdimm@lists.linux.dev
> Cc: jhubbard@nvidia.com
> Cc: hch@lst.de
> Cc: zhang.lyra@gmail.com
> Cc: debug@rivosinc.com
> Cc: bjorn@kernel.org
> Cc: balbirs@nvidia.com
> Cc: lorenzo.stoakes@oracle.com
> Cc: John@Groves.net
>
> ---
>
> Splitting this off from the rest of my series[1] as a separate clean-up
> for consideration for the v6.16 merge window as suggested by Christoph.
>
> [1] - https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
> ---
>   include/linux/pfn_t.h             | 31 +++----------------------------
>   mm/memory.c                       |  2 --
>   tools/testing/nvdimm/test/iomap.c |  4 ----
>   3 files changed, 3 insertions(+), 34 deletions(-)

This patch landed in today's linux-next as commit 28be5676b4a3 ("mm: 
remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my tests 
I've noticed that it breaks operation of all RISC-V 64bit boards on my 
test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). I've 
isolated the changes responsible for this issue, see the inline comments 
in the patch below. Here is an example of the issues observed in the 
logs from those machines:

BUG: Bad page map in process modprobe  pte:20682653 pmd:20f23c01
page: refcount:1 mapcount:-1 mapping:0000000000000000 index:0x0 pfn:0x81a09
flags: 0x2004(referenced|reserved|zone=0)
raw: 0000000000002004 ff1c000000068248 ff1c000000068248 0000000000000000
raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000
page dumped because: bad pte
addr:00007fff84619000 vm_flags:04044411 anon_vma:0000000000000000 
mapping:0000000000000000 index:0
file:(null) fault:special_mapping_fault mmap:0x0 mmap_prepare: 0x0 
read_folio:0x0
CPU: 1 UID: 0 PID: 58 Comm: modprobe Not tainted 
6.16.0-rc1-next-20250610+ #15719 NONE
Hardware name: riscv-virtio,qemu (DT)
Call Trace:
[<ffffffff80016152>] dump_backtrace+0x1c/0x24
[<ffffffff8000147a>] show_stack+0x28/0x34
[<ffffffff8000f61e>] dump_stack_lvl+0x5e/0x86
[<ffffffff8000f65a>] dump_stack+0x14/0x1c
[<ffffffff80234b7e>] print_bad_pte+0x1b4/0x1ee
[<ffffffff8023854a>] unmap_page_range+0x4da/0xf74
[<ffffffff80239042>] unmap_single_vma.constprop.0+0x5e/0x90
[<ffffffff8023913a>] unmap_vmas+0xc6/0x1c4
[<ffffffff80244a70>] exit_mmap+0xb6/0x418
[<ffffffff80021dc4>] mmput+0x56/0xf2
[<ffffffff8002b84e>] do_exit+0x182/0x80e
[<ffffffff8002c02a>] do_group_exit+0x24/0x70
[<ffffffff8002c092>] pid_child_should_wake+0x0/0x54
[<ffffffff80b66112>] do_trap_ecall_u+0x29c/0x4cc
[<ffffffff80b747e6>] handle_exception+0x146/0x152
Disabling lock debugging due to kernel taint


> diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
> index 2d9148221e9a..46afa12eb33b 100644
> --- a/include/linux/pfn_t.h
> +++ b/include/linux/pfn_t.h
> @@ -5,26 +5,13 @@
>   
>   /*
>    * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
> - * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
> - * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
>    * PFN_DEV - pfn is not covered by system memmap by default
> - * PFN_MAP - pfn has a dynamic page mapping established by a device driver
> - * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
> - *		 get_user_pages
>    */
>   #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
> -#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
> -#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
>   #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
> -#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
> -#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
>   
>   #define PFN_FLAGS_TRACE \
> -	{ PFN_SPECIAL,	"SPECIAL" }, \
> -	{ PFN_SG_CHAIN,	"SG_CHAIN" }, \
> -	{ PFN_SG_LAST,	"SG_LAST" }, \
> -	{ PFN_DEV,	"DEV" }, \
> -	{ PFN_MAP,	"MAP" }
> +	{ PFN_DEV,	"DEV" }
>   
>   static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
>   {
> @@ -46,7 +33,7 @@ static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
>   
>   static inline bool pfn_t_has_page(pfn_t pfn)
>   {
> -	return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
> +	return (pfn.val & PFN_DEV) == 0;
>   }
>   
>   static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
> @@ -100,7 +87,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
>   #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
>   static inline bool pfn_t_devmap(pfn_t pfn)
>   {
> -	const u64 flags = PFN_DEV|PFN_MAP;
> +	const u64 flags = PFN_DEV;
>   
>   	return (pfn.val & flags) == flags;
>   }

The above change causes the stability issues on RISC-V based boards. To 
get them working again with today's linux-next I had to apply the 
following change:

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6ff7dd305639..f502860f2a76 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -46,7 +46,6 @@ config RISCV
         select ARCH_HAS_PREEMPT_LAZY
         select ARCH_HAS_PREPARE_SYNC_CORE_CMD
         select ARCH_HAS_PTDUMP if MMU
-       select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU
         select ARCH_HAS_PTE_SPECIAL
         select ARCH_HAS_SET_DIRECT_MAP if MMU
         select ARCH_HAS_SET_MEMORY if MMU

I'm not sure if this is really the desired solution and frankly speaking 
I don't understand the code behind the 'devmap' related bits. I can help 
testing other patches that will fix this issue properly.


> @@ -116,16 +103,4 @@ pmd_t pmd_mkdevmap(pmd_t pmd);
>   pud_t pud_mkdevmap(pud_t pud);
>   #endif
>   #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
> -
> -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
> -static inline bool pfn_t_special(pfn_t pfn)
> -{
> -	return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
> -}
> -#else
> -static inline bool pfn_t_special(pfn_t pfn)
> -{
> -	return false;
> -}
> -#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
>   #endif /* _LINUX_PFN_T_H_ */
> diff --git a/mm/memory.c b/mm/memory.c
> index 49199410805c..cc85f814bc1c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2569,8 +2569,6 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite)
>   		return true;
>   	if (pfn_t_devmap(pfn))
>   		return true;
> -	if (pfn_t_special(pfn))
> -		return true;
>   	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
>   		return true;
>   	return false;
> diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
> index e4313726fae3..ddceb04b4a9a 100644
> --- a/tools/testing/nvdimm/test/iomap.c
> +++ b/tools/testing/nvdimm/test/iomap.c
> @@ -137,10 +137,6 @@ EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
>   
>   pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
>   {
> -	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
> -
> -	if (nfit_res)
> -		flags &= ~PFN_MAP;
>           return phys_to_pfn_t(addr, flags);
>   }
>   EXPORT_SYMBOL(__wrap_phys_to_pfn_t);

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
  2025-06-10 16:18   ` Marek Szyprowski
@ 2025-06-11  2:38     ` Alistair Popple
  2025-06-11  8:03       ` Marek Szyprowski
  0 siblings, 1 reply; 7+ messages in thread
From: Alistair Popple @ 2025-06-11  2:38 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: linux-mm, akpm, linux-riscv, David Hildenbrand,
	Christoph Hellwig, Jason Gunthorpe, gerald.schaefer,
	dan.j.williams, jgg, willy, linux-kernel, nvdimm, jhubbard,
	zhang.lyra, debug, bjorn, balbirs, lorenzo.stoakes, John

On Tue, Jun 10, 2025 at 06:18:09PM +0200, Marek Szyprowski wrote:
> Dear All,
> 
> On 04.06.2025 05:21, Alistair Popple wrote:
> > The PFN_MAP flag is no longer used for anything, so remove it.
> > The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
> > used so also remove them. The last user of PFN_SPECIAL was removed
> > by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
> > support").
> >
> > Signed-off-by: Alistair Popple <apopple@nvidia.com>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > Cc: gerald.schaefer@linux.ibm.com
> > Cc: dan.j.williams@intel.com
> > Cc: jgg@ziepe.ca
> > Cc: willy@infradead.org
> > Cc: david@redhat.com
> > Cc: linux-kernel@vger.kernel.org
> > Cc: nvdimm@lists.linux.dev
> > Cc: jhubbard@nvidia.com
> > Cc: hch@lst.de
> > Cc: zhang.lyra@gmail.com
> > Cc: debug@rivosinc.com
> > Cc: bjorn@kernel.org
> > Cc: balbirs@nvidia.com
> > Cc: lorenzo.stoakes@oracle.com
> > Cc: John@Groves.net
> >
> > ---
> >
> > Splitting this off from the rest of my series[1] as a separate clean-up
> > for consideration for the v6.16 merge window as suggested by Christoph.
> >
> > [1] - https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
> > ---
> >   include/linux/pfn_t.h             | 31 +++----------------------------
> >   mm/memory.c                       |  2 --
> >   tools/testing/nvdimm/test/iomap.c |  4 ----
> >   3 files changed, 3 insertions(+), 34 deletions(-)
> 
> This patch landed in today's linux-next as commit 28be5676b4a3 ("mm: 
> remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my tests 
> I've noticed that it breaks operation of all RISC-V 64bit boards on my 
> test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). I've 
> isolated the changes responsible for this issue, see the inline comments 
> in the patch below. Here is an example of the issues observed in the 
> logs from those machines:

Thanks for the report. I'm really confused by this because this change should
just be removal of dead code - nothing sets any of the removed PFN_* flags
AFAICT.

I don't have access to any RISC-V hardwdare but you say this reproduces under
qemu - what do you run on the system to cause the error? Is it just a simple
boot and load a module or are you running selftests or something else?

Also if possible it would be good to know if you still see the issue
after applying the full series (https://lore.kernel.org/linux-mm/
cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvi
dia.com/).

> BUG: Bad page map in process modprobe  pte:20682653 pmd:20f23c01
> page: refcount:1 mapcount:-1 mapping:0000000000000000 index:0x0 pfn:0x81a09
> flags: 0x2004(referenced|reserved|zone=0)
> raw: 0000000000002004 ff1c000000068248 ff1c000000068248 0000000000000000
> raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000
> page dumped because: bad pte
> addr:00007fff84619000 vm_flags:04044411 anon_vma:0000000000000000 
> mapping:0000000000000000 index:0
> file:(null) fault:special_mapping_fault mmap:0x0 mmap_prepare: 0x0 
> read_folio:0x0
> CPU: 1 UID: 0 PID: 58 Comm: modprobe Not tainted 
> 6.16.0-rc1-next-20250610+ #15719 NONE
> Hardware name: riscv-virtio,qemu (DT)
> Call Trace:
> [<ffffffff80016152>] dump_backtrace+0x1c/0x24
> [<ffffffff8000147a>] show_stack+0x28/0x34
> [<ffffffff8000f61e>] dump_stack_lvl+0x5e/0x86
> [<ffffffff8000f65a>] dump_stack+0x14/0x1c
> [<ffffffff80234b7e>] print_bad_pte+0x1b4/0x1ee
> [<ffffffff8023854a>] unmap_page_range+0x4da/0xf74
> [<ffffffff80239042>] unmap_single_vma.constprop.0+0x5e/0x90
> [<ffffffff8023913a>] unmap_vmas+0xc6/0x1c4
> [<ffffffff80244a70>] exit_mmap+0xb6/0x418
> [<ffffffff80021dc4>] mmput+0x56/0xf2
> [<ffffffff8002b84e>] do_exit+0x182/0x80e
> [<ffffffff8002c02a>] do_group_exit+0x24/0x70
> [<ffffffff8002c092>] pid_child_should_wake+0x0/0x54
> [<ffffffff80b66112>] do_trap_ecall_u+0x29c/0x4cc
> [<ffffffff80b747e6>] handle_exception+0x146/0x152
> Disabling lock debugging due to kernel taint
> 
> 
> > diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
> > index 2d9148221e9a..46afa12eb33b 100644
> > --- a/include/linux/pfn_t.h
> > +++ b/include/linux/pfn_t.h
> > @@ -5,26 +5,13 @@
> >   
> >   /*
> >    * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
> > - * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
> > - * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
> >    * PFN_DEV - pfn is not covered by system memmap by default
> > - * PFN_MAP - pfn has a dynamic page mapping established by a device driver
> > - * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
> > - *		 get_user_pages
> >    */
> >   #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
> > -#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
> > -#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
> >   #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
> > -#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
> > -#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
> >   
> >   #define PFN_FLAGS_TRACE \
> > -	{ PFN_SPECIAL,	"SPECIAL" }, \
> > -	{ PFN_SG_CHAIN,	"SG_CHAIN" }, \
> > -	{ PFN_SG_LAST,	"SG_LAST" }, \
> > -	{ PFN_DEV,	"DEV" }, \
> > -	{ PFN_MAP,	"MAP" }
> > +	{ PFN_DEV,	"DEV" }
> >   
> >   static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
> >   {
> > @@ -46,7 +33,7 @@ static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
> >   
> >   static inline bool pfn_t_has_page(pfn_t pfn)
> >   {
> > -	return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
> > +	return (pfn.val & PFN_DEV) == 0;
> >   }
> >   
> >   static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
> > @@ -100,7 +87,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
> >   #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
> >   static inline bool pfn_t_devmap(pfn_t pfn)
> >   {
> > -	const u64 flags = PFN_DEV|PFN_MAP;
> > +	const u64 flags = PFN_DEV;
> >   
> >   	return (pfn.val & flags) == flags;
> >   }
> 
> The above change causes the stability issues on RISC-V based boards. To 
> get them working again with today's linux-next I had to apply the 
> following change:
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 6ff7dd305639..f502860f2a76 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -46,7 +46,6 @@ config RISCV
>          select ARCH_HAS_PREEMPT_LAZY
>          select ARCH_HAS_PREPARE_SYNC_CORE_CMD
>          select ARCH_HAS_PTDUMP if MMU
> -       select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU
>          select ARCH_HAS_PTE_SPECIAL
>          select ARCH_HAS_SET_DIRECT_MAP if MMU
>          select ARCH_HAS_SET_MEMORY if MMU
> 
> I'm not sure if this is really the desired solution and frankly speaking 
> I don't understand the code behind the 'devmap' related bits. I can help 
> testing other patches that will fix this issue properly.
> 
> 
> > @@ -116,16 +103,4 @@ pmd_t pmd_mkdevmap(pmd_t pmd);
> >   pud_t pud_mkdevmap(pud_t pud);
> >   #endif
> >   #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
> > -
> > -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
> > -static inline bool pfn_t_special(pfn_t pfn)
> > -{
> > -	return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
> > -}
> > -#else
> > -static inline bool pfn_t_special(pfn_t pfn)
> > -{
> > -	return false;
> > -}
> > -#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
> >   #endif /* _LINUX_PFN_T_H_ */
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 49199410805c..cc85f814bc1c 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2569,8 +2569,6 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite)
> >   		return true;
> >   	if (pfn_t_devmap(pfn))
> >   		return true;
> > -	if (pfn_t_special(pfn))
> > -		return true;
> >   	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
> >   		return true;
> >   	return false;
> > diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
> > index e4313726fae3..ddceb04b4a9a 100644
> > --- a/tools/testing/nvdimm/test/iomap.c
> > +++ b/tools/testing/nvdimm/test/iomap.c
> > @@ -137,10 +137,6 @@ EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
> >   
> >   pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
> >   {
> > -	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
> > -
> > -	if (nfit_res)
> > -		flags &= ~PFN_MAP;
> >           return phys_to_pfn_t(addr, flags);
> >   }
> >   EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
> 
> Best regards
> -- 
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
  2025-06-11  2:38     ` Alistair Popple
@ 2025-06-11  8:03       ` Marek Szyprowski
  2025-06-11  8:23         ` David Hildenbrand
  0 siblings, 1 reply; 7+ messages in thread
From: Marek Szyprowski @ 2025-06-11  8:03 UTC (permalink / raw)
  To: Alistair Popple
  Cc: linux-mm, akpm, linux-riscv, David Hildenbrand,
	Christoph Hellwig, Jason Gunthorpe, gerald.schaefer,
	dan.j.williams, jgg, willy, linux-kernel, nvdimm, jhubbard,
	zhang.lyra, debug, bjorn, balbirs, lorenzo.stoakes, John

On 11.06.2025 04:38, Alistair Popple wrote:
> On Tue, Jun 10, 2025 at 06:18:09PM +0200, Marek Szyprowski wrote:
>> On 04.06.2025 05:21, Alistair Popple wrote:
>>> The PFN_MAP flag is no longer used for anything, so remove it.
>>> The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
>>> used so also remove them. The last user of PFN_SPECIAL was removed
>>> by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
>>> support").
>>>
>>> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>>> Acked-by: David Hildenbrand <david@redhat.com>
>>> Reviewed-by: Christoph Hellwig <hch@lst.de>
>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Cc: gerald.schaefer@linux.ibm.com
>>> Cc: dan.j.williams@intel.com
>>> Cc: jgg@ziepe.ca
>>> Cc: willy@infradead.org
>>> Cc: david@redhat.com
>>> Cc: linux-kernel@vger.kernel.org
>>> Cc: nvdimm@lists.linux.dev
>>> Cc: jhubbard@nvidia.com
>>> Cc: hch@lst.de
>>> Cc: zhang.lyra@gmail.com
>>> Cc: debug@rivosinc.com
>>> Cc: bjorn@kernel.org
>>> Cc: balbirs@nvidia.com
>>> Cc: lorenzo.stoakes@oracle.com
>>> Cc: John@Groves.net
>>>
>>> ---
>>>
>>> Splitting this off from the rest of my series[1] as a separate clean-up
>>> for consideration for the v6.16 merge window as suggested by Christoph.
>>>
>>> [1] - https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
>>> ---
>>>    include/linux/pfn_t.h             | 31 +++----------------------------
>>>    mm/memory.c                       |  2 --
>>>    tools/testing/nvdimm/test/iomap.c |  4 ----
>>>    3 files changed, 3 insertions(+), 34 deletions(-)
>> This patch landed in today's linux-next as commit 28be5676b4a3 ("mm:
>> remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my tests
>> I've noticed that it breaks operation of all RISC-V 64bit boards on my
>> test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). I've
>> isolated the changes responsible for this issue, see the inline comments
>> in the patch below. Here is an example of the issues observed in the
>> logs from those machines:
> Thanks for the report. I'm really confused by this because this change should
> just be removal of dead code - nothing sets any of the removed PFN_* flags
> AFAICT.
>
> I don't have access to any RISC-V hardwdare but you say this reproduces under
> qemu - what do you run on the system to cause the error? Is it just a simple
> boot and load a module or are you running selftests or something else?

It fails a simple boot test. Here is a detailed instruction how to 
reproduce this issue with the random Debian rootfs image found on the 
internet (tested on Ubuntu 22.04, with next-20250610
kernel source):

cd linux-src

wget 
http://vimer.7766.org:63015/images/Unmatched-debian/202503/nvme-rootfs.img.xz
xz -d nvme-rootfs.img.xz
make ARCH=riscv CROSS_COMPILE="riscv64-linux-gnu-" defconfig -j16 Image
qemu-system-riscv64 -kernel arch/riscv/boot/Image -append "console=ttyS0 
earlycon no_console_suspend root=/dev/vda1 rw rootwait 
ip=::::target::off" -M virt -smp 2 -m 1024 -device 
virtio-blk-device,drive=virtio-blk0 -drive 
file=nvme-rootfs.img,id=virtio-blk0,if=none,format=raw -netdev 
user,id=user -device virtio-net-device,netdev=user -serial mon:stdio 
-display none

("ctrl-a x" to exit)


> Also if possible it would be good to know if you still see the issue
> after applying the full series (https://lore.kernel.org/linux-mm/
> cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvi
> https://protect2.fireeye.com/v1/url?k=9dffef89-c264d6ec-9dfe64c6-000babff32e3-99412cda4ccd1e25&q=1&e=26e3fd2c-72ca-4455-8caf-3257b47262e5&u=http%3A%2F%2Fdia.com%2F%29.

Well, that whole patchset applied on top of v6.15-rc7 seems to be 
working fine. I was not able to rebase it onto v6.16-rc1 or 
next-20250610 without conflicts, so I didn't test it further on newer 
kernels.

However I've checked status after each patch from that patchset. After 
applying the first one (which is $subject), the RISCV issue appears. 
Then it gets fixed by the "[PATCH 04/12] mm: Convert vmf_insert_mixed() 
from using pte_devmap to pte_special" patch. Applying the "[PATCH 04/12] 
mm: Convert vmf_insert_mixed() from using pte_devmap to pte_special" 
patch on top of next-20250610 fixes the RISCV issue too.

I hope this analysis somehow helps.

>> BUG: Bad page map in process modprobe  pte:20682653 pmd:20f23c01
>> page: refcount:1 mapcount:-1 mapping:0000000000000000 index:0x0 pfn:0x81a09
>> flags: 0x2004(referenced|reserved|zone=0)
>> raw: 0000000000002004 ff1c000000068248 ff1c000000068248 0000000000000000
>> raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000
>> page dumped because: bad pte
>> addr:00007fff84619000 vm_flags:04044411 anon_vma:0000000000000000
>> mapping:0000000000000000 index:0
>> file:(null) fault:special_mapping_fault mmap:0x0 mmap_prepare: 0x0
>> read_folio:0x0
>> CPU: 1 UID: 0 PID: 58 Comm: modprobe Not tainted
>> 6.16.0-rc1-next-20250610+ #15719 NONE
>> Hardware name: riscv-virtio,qemu (DT)
>> Call Trace:
>> [<ffffffff80016152>] dump_backtrace+0x1c/0x24
>> [<ffffffff8000147a>] show_stack+0x28/0x34
>> [<ffffffff8000f61e>] dump_stack_lvl+0x5e/0x86
>> [<ffffffff8000f65a>] dump_stack+0x14/0x1c
>> [<ffffffff80234b7e>] print_bad_pte+0x1b4/0x1ee
>> [<ffffffff8023854a>] unmap_page_range+0x4da/0xf74
>> [<ffffffff80239042>] unmap_single_vma.constprop.0+0x5e/0x90
>> [<ffffffff8023913a>] unmap_vmas+0xc6/0x1c4
>> [<ffffffff80244a70>] exit_mmap+0xb6/0x418
>> [<ffffffff80021dc4>] mmput+0x56/0xf2
>> [<ffffffff8002b84e>] do_exit+0x182/0x80e
>> [<ffffffff8002c02a>] do_group_exit+0x24/0x70
>> [<ffffffff8002c092>] pid_child_should_wake+0x0/0x54
>> [<ffffffff80b66112>] do_trap_ecall_u+0x29c/0x4cc
>> [<ffffffff80b747e6>] handle_exception+0x146/0x152
>> Disabling lock debugging due to kernel taint
>>
>>
>>> diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
>>> index 2d9148221e9a..46afa12eb33b 100644
>>> --- a/include/linux/pfn_t.h
>>> +++ b/include/linux/pfn_t.h
>>> @@ -5,26 +5,13 @@
>>>    
>>>    /*
>>>     * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
>>> - * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
>>> - * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
>>>     * PFN_DEV - pfn is not covered by system memmap by default
>>> - * PFN_MAP - pfn has a dynamic page mapping established by a device driver
>>> - * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
>>> - *		 get_user_pages
>>>     */
>>>    #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
>>> -#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
>>> -#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
>>>    #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
>>> -#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
>>> -#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
>>>    
>>>    #define PFN_FLAGS_TRACE \
>>> -	{ PFN_SPECIAL,	"SPECIAL" }, \
>>> -	{ PFN_SG_CHAIN,	"SG_CHAIN" }, \
>>> -	{ PFN_SG_LAST,	"SG_LAST" }, \
>>> -	{ PFN_DEV,	"DEV" }, \
>>> -	{ PFN_MAP,	"MAP" }
>>> +	{ PFN_DEV,	"DEV" }
>>>    
>>>    static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
>>>    {
>>> @@ -46,7 +33,7 @@ static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
>>>    
>>>    static inline bool pfn_t_has_page(pfn_t pfn)
>>>    {
>>> -	return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
>>> +	return (pfn.val & PFN_DEV) == 0;
>>>    }
>>>    
>>>    static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
>>> @@ -100,7 +87,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
>>>    #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
>>>    static inline bool pfn_t_devmap(pfn_t pfn)
>>>    {
>>> -	const u64 flags = PFN_DEV|PFN_MAP;
>>> +	const u64 flags = PFN_DEV;
>>>    
>>>    	return (pfn.val & flags) == flags;
>>>    }
>> The above change causes the stability issues on RISC-V based boards. To
>> get them working again with today's linux-next I had to apply the
>> following change:
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index 6ff7dd305639..f502860f2a76 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -46,7 +46,6 @@ config RISCV
>>           select ARCH_HAS_PREEMPT_LAZY
>>           select ARCH_HAS_PREPARE_SYNC_CORE_CMD
>>           select ARCH_HAS_PTDUMP if MMU
>> -       select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU
>>           select ARCH_HAS_PTE_SPECIAL
>>           select ARCH_HAS_SET_DIRECT_MAP if MMU
>>           select ARCH_HAS_SET_MEMORY if MMU
>>
>> I'm not sure if this is really the desired solution and frankly speaking
>> I don't understand the code behind the 'devmap' related bits. I can help
>> testing other patches that will fix this issue properly.
>>
>>
>>> @@ -116,16 +103,4 @@ pmd_t pmd_mkdevmap(pmd_t pmd);
>>>    pud_t pud_mkdevmap(pud_t pud);
>>>    #endif
>>>    #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
>>> -
>>> -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
>>> -static inline bool pfn_t_special(pfn_t pfn)
>>> -{
>>> -	return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
>>> -}
>>> -#else
>>> -static inline bool pfn_t_special(pfn_t pfn)
>>> -{
>>> -	return false;
>>> -}
>>> -#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
>>>    #endif /* _LINUX_PFN_T_H_ */
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 49199410805c..cc85f814bc1c 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -2569,8 +2569,6 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite)
>>>    		return true;
>>>    	if (pfn_t_devmap(pfn))
>>>    		return true;
>>> -	if (pfn_t_special(pfn))
>>> -		return true;
>>>    	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
>>>    		return true;
>>>    	return false;
>>> diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
>>> index e4313726fae3..ddceb04b4a9a 100644
>>> --- a/tools/testing/nvdimm/test/iomap.c
>>> +++ b/tools/testing/nvdimm/test/iomap.c
>>> @@ -137,10 +137,6 @@ EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
>>>    
>>>    pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
>>>    {
>>> -	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
>>> -
>>> -	if (nfit_res)
>>> -		flags &= ~PFN_MAP;
>>>            return phys_to_pfn_t(addr, flags);
>>>    }
>>>    EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
>>>
>>>
Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
  2025-06-11  8:03       ` Marek Szyprowski
@ 2025-06-11  8:23         ` David Hildenbrand
  2025-06-11  8:42           ` Marek Szyprowski
  0 siblings, 1 reply; 7+ messages in thread
From: David Hildenbrand @ 2025-06-11  8:23 UTC (permalink / raw)
  To: Marek Szyprowski, Alistair Popple
  Cc: linux-mm, akpm, linux-riscv, Christoph Hellwig, Jason Gunthorpe,
	gerald.schaefer, dan.j.williams, jgg, willy, linux-kernel,
	nvdimm, jhubbard, zhang.lyra, debug, bjorn, balbirs,
	lorenzo.stoakes, John

On 11.06.25 10:03, Marek Szyprowski wrote:
> On 11.06.2025 04:38, Alistair Popple wrote:
>> On Tue, Jun 10, 2025 at 06:18:09PM +0200, Marek Szyprowski wrote:
>>> On 04.06.2025 05:21, Alistair Popple wrote:
>>>> The PFN_MAP flag is no longer used for anything, so remove it.
>>>> The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
>>>> used so also remove them. The last user of PFN_SPECIAL was removed
>>>> by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
>>>> support").
>>>>
>>>> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>>>> Acked-by: David Hildenbrand <david@redhat.com>
>>>> Reviewed-by: Christoph Hellwig <hch@lst.de>
>>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>>> Cc: gerald.schaefer@linux.ibm.com
>>>> Cc: dan.j.williams@intel.com
>>>> Cc: jgg@ziepe.ca
>>>> Cc: willy@infradead.org
>>>> Cc: david@redhat.com
>>>> Cc: linux-kernel@vger.kernel.org
>>>> Cc: nvdimm@lists.linux.dev
>>>> Cc: jhubbard@nvidia.com
>>>> Cc: hch@lst.de
>>>> Cc: zhang.lyra@gmail.com
>>>> Cc: debug@rivosinc.com
>>>> Cc: bjorn@kernel.org
>>>> Cc: balbirs@nvidia.com
>>>> Cc: lorenzo.stoakes@oracle.com
>>>> Cc: John@Groves.net
>>>>
>>>> ---
>>>>
>>>> Splitting this off from the rest of my series[1] as a separate clean-up
>>>> for consideration for the v6.16 merge window as suggested by Christoph.
>>>>
>>>> [1] - https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
>>>> ---
>>>>     include/linux/pfn_t.h             | 31 +++----------------------------
>>>>     mm/memory.c                       |  2 --
>>>>     tools/testing/nvdimm/test/iomap.c |  4 ----
>>>>     3 files changed, 3 insertions(+), 34 deletions(-)
>>> This patch landed in today's linux-next as commit 28be5676b4a3 ("mm:
>>> remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my tests
>>> I've noticed that it breaks operation of all RISC-V 64bit boards on my
>>> test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). I've
>>> isolated the changes responsible for this issue, see the inline comments
>>> in the patch below. Here is an example of the issues observed in the
>>> logs from those machines:
>> Thanks for the report. I'm really confused by this because this change should
>> just be removal of dead code - nothing sets any of the removed PFN_* flags
>> AFAICT.
>>
>> I don't have access to any RISC-V hardwdare but you say this reproduces under
>> qemu - what do you run on the system to cause the error? Is it just a simple
>> boot and load a module or are you running selftests or something else?
> 
> It fails a simple boot test. Here is a detailed instruction how to
> reproduce this issue with the random Debian rootfs image found on the
> internet (tested on Ubuntu 22.04, with next-20250610
> kernel source):

riscv is one of the archs where pte_mkdevmap() will *not* set the pte as special. (I
raised this recently in the original series, it's all a big mess)

So, before this change here, pfn_t_devmap() would have returned "false" if only
PFN_DEV was set, now it would return "true" if only PFN_DEV is set.

Consequently, in insert_pfn() we would have done a pte_mkspecial(), now we do a
pte_mkdevmap() -- again, which does not imply "special" on riscv.

riscv selects CONFIG_ARCH_HAS_PTE_SPECIAL, so if !pte_special(), it's considered as
normal.

Would the following fix your issue?


diff --git a/mm/memory.c b/mm/memory.c
index 8eba595056fe3..0e972c3493692 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -589,6 +589,10 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
  {
         unsigned long pfn = pte_pfn(pte);
  
+       /* TODO: remove this crap and set pte_special() instead. */
+       if (pte_devmap(pte))
+               return NULL;
+
         if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
                 if (likely(!pte_special(pte)))
                         goto check_pfn;
@@ -598,16 +602,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                         return NULL;
                 if (is_zero_pfn(pfn))
                         return NULL;
-               if (pte_devmap(pte))
-               /*
-                * NOTE: New users of ZONE_DEVICE will not set pte_devmap()
-                * and will have refcounts incremented on their struct pages
-                * when they are inserted into PTEs, thus they are safe to
-                * return here. Legacy ZONE_DEVICE pages that set pte_devmap()
-                * do not have refcounts. Example of legacy ZONE_DEVICE is
-                * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers.
-                */
-                       return NULL;
  
                 print_bad_pte(vma, addr, pte, NULL);
                 return NULL;


But, I would have thought the later patches in Alistairs series would sort that out
(where we remove pte_devmap() ... )

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
  2025-06-11  8:23         ` David Hildenbrand
@ 2025-06-11  8:42           ` Marek Szyprowski
  2025-06-11  8:58             ` Alistair Popple
  0 siblings, 1 reply; 7+ messages in thread
From: Marek Szyprowski @ 2025-06-11  8:42 UTC (permalink / raw)
  To: David Hildenbrand, Alistair Popple
  Cc: linux-mm, akpm, linux-riscv, Christoph Hellwig, Jason Gunthorpe,
	gerald.schaefer, dan.j.williams, jgg, willy, linux-kernel,
	nvdimm, jhubbard, zhang.lyra, debug, bjorn, balbirs,
	lorenzo.stoakes, John

On 11.06.2025 10:23, David Hildenbrand wrote:
> On 11.06.25 10:03, Marek Szyprowski wrote:
>> On 11.06.2025 04:38, Alistair Popple wrote:
>>> On Tue, Jun 10, 2025 at 06:18:09PM +0200, Marek Szyprowski wrote:
>>>> On 04.06.2025 05:21, Alistair Popple wrote:
>>>>> The PFN_MAP flag is no longer used for anything, so remove it.
>>>>> The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
>>>>> used so also remove them. The last user of PFN_SPECIAL was removed
>>>>> by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
>>>>> support").
>>>>>
>>>>> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>>>>> Acked-by: David Hildenbrand <david@redhat.com>
>>>>> Reviewed-by: Christoph Hellwig <hch@lst.de>
>>>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>>>> Cc: gerald.schaefer@linux.ibm.com
>>>>> Cc: dan.j.williams@intel.com
>>>>> Cc: jgg@ziepe.ca
>>>>> Cc: willy@infradead.org
>>>>> Cc: david@redhat.com
>>>>> Cc: linux-kernel@vger.kernel.org
>>>>> Cc: nvdimm@lists.linux.dev
>>>>> Cc: jhubbard@nvidia.com
>>>>> Cc: hch@lst.de
>>>>> Cc: zhang.lyra@gmail.com
>>>>> Cc: debug@rivosinc.com
>>>>> Cc: bjorn@kernel.org
>>>>> Cc: balbirs@nvidia.com
>>>>> Cc: lorenzo.stoakes@oracle.com
>>>>> Cc: John@Groves.net
>>>>>
>>>>> ---
>>>>>
>>>>> Splitting this off from the rest of my series[1] as a separate 
>>>>> clean-up
>>>>> for consideration for the v6.16 merge window as suggested by 
>>>>> Christoph.
>>>>>
>>>>> [1] - 
>>>>> https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
>>>>> ---
>>>>>     include/linux/pfn_t.h             | 31 
>>>>> +++----------------------------
>>>>>     mm/memory.c                       |  2 --
>>>>>     tools/testing/nvdimm/test/iomap.c |  4 ----
>>>>>     3 files changed, 3 insertions(+), 34 deletions(-)
>>>> This patch landed in today's linux-next as commit 28be5676b4a3 ("mm:
>>>> remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my 
>>>> tests
>>>> I've noticed that it breaks operation of all RISC-V 64bit boards on my
>>>> test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). 
>>>> I've
>>>> isolated the changes responsible for this issue, see the inline 
>>>> comments
>>>> in the patch below. Here is an example of the issues observed in the
>>>> logs from those machines:
>>> Thanks for the report. I'm really confused by this because this 
>>> change should
>>> just be removal of dead code - nothing sets any of the removed PFN_* 
>>> flags
>>> AFAICT.
>>>
>>> I don't have access to any RISC-V hardwdare but you say this 
>>> reproduces under
>>> qemu - what do you run on the system to cause the error? Is it just 
>>> a simple
>>> boot and load a module or are you running selftests or something else?
>>
>> It fails a simple boot test. Here is a detailed instruction how to
>> reproduce this issue with the random Debian rootfs image found on the
>> internet (tested on Ubuntu 22.04, with next-20250610
>> kernel source):
>
> riscv is one of the archs where pte_mkdevmap() will *not* set the pte 
> as special. (I
> raised this recently in the original series, it's all a big mess)
>
> So, before this change here, pfn_t_devmap() would have returned 
> "false" if only
> PFN_DEV was set, now it would return "true" if only PFN_DEV is set.
>
> Consequently, in insert_pfn() we would have done a pte_mkspecial(), 
> now we do a
> pte_mkdevmap() -- again, which does not imply "special" on riscv.
>
> riscv selects CONFIG_ARCH_HAS_PTE_SPECIAL, so if !pte_special(), it's 
> considered as
> normal.
>
> Would the following fix your issue?
>
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 8eba595056fe3..0e972c3493692 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -589,6 +589,10 @@ struct page *vm_normal_page(struct vm_area_struct 
> *vma, unsigned long addr,
>  {
>         unsigned long pfn = pte_pfn(pte);
>
> +       /* TODO: remove this crap and set pte_special() instead. */
> +       if (pte_devmap(pte))
> +               return NULL;
> +
>         if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
>                 if (likely(!pte_special(pte)))
>                         goto check_pfn;
> @@ -598,16 +602,6 @@ struct page *vm_normal_page(struct vm_area_struct 
> *vma, unsigned long addr,
>                         return NULL;
>                 if (is_zero_pfn(pfn))
>                         return NULL;
> -               if (pte_devmap(pte))
> -               /*
> -                * NOTE: New users of ZONE_DEVICE will not set 
> pte_devmap()
> -                * and will have refcounts incremented on their struct 
> pages
> -                * when they are inserted into PTEs, thus they are 
> safe to
> -                * return here. Legacy ZONE_DEVICE pages that set 
> pte_devmap()
> -                * do not have refcounts. Example of legacy 
> ZONE_DEVICE is
> -                * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs 
> drivers.
> -                */
> -                       return NULL;
>
>                 print_bad_pte(vma, addr, pte, NULL);
>                 return NULL;
>
>
> But, I would have thought the later patches in Alistairs series would 
> sort that out
> (where we remove pte_devmap() ... )
>
The above change fixes the issues observed on RISCV boards.

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST
  2025-06-11  8:42           ` Marek Szyprowski
@ 2025-06-11  8:58             ` Alistair Popple
  0 siblings, 0 replies; 7+ messages in thread
From: Alistair Popple @ 2025-06-11  8:58 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: David Hildenbrand, linux-mm, akpm, linux-riscv,
	Christoph Hellwig, Jason Gunthorpe, gerald.schaefer,
	dan.j.williams, jgg, willy, linux-kernel, nvdimm, jhubbard,
	zhang.lyra, debug, bjorn, balbirs, lorenzo.stoakes, John

On Wed, Jun 11, 2025 at 10:42:16AM +0200, Marek Szyprowski wrote:
> On 11.06.2025 10:23, David Hildenbrand wrote:
> > On 11.06.25 10:03, Marek Szyprowski wrote:
> >> On 11.06.2025 04:38, Alistair Popple wrote:
> >>> On Tue, Jun 10, 2025 at 06:18:09PM +0200, Marek Szyprowski wrote:
> >>>> On 04.06.2025 05:21, Alistair Popple wrote:
> >>>>> The PFN_MAP flag is no longer used for anything, so remove it.
> >>>>> The PFN_SG_CHAIN and PFN_SG_LAST flags never appear to have been
> >>>>> used so also remove them. The last user of PFN_SPECIAL was removed
> >>>>> by 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED
> >>>>> support").
> >>>>>
> >>>>> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> >>>>> Acked-by: David Hildenbrand <david@redhat.com>
> >>>>> Reviewed-by: Christoph Hellwig <hch@lst.de>
> >>>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> >>>>> Cc: gerald.schaefer@linux.ibm.com
> >>>>> Cc: dan.j.williams@intel.com
> >>>>> Cc: jgg@ziepe.ca
> >>>>> Cc: willy@infradead.org
> >>>>> Cc: david@redhat.com
> >>>>> Cc: linux-kernel@vger.kernel.org
> >>>>> Cc: nvdimm@lists.linux.dev
> >>>>> Cc: jhubbard@nvidia.com
> >>>>> Cc: hch@lst.de
> >>>>> Cc: zhang.lyra@gmail.com
> >>>>> Cc: debug@rivosinc.com
> >>>>> Cc: bjorn@kernel.org
> >>>>> Cc: balbirs@nvidia.com
> >>>>> Cc: lorenzo.stoakes@oracle.com
> >>>>> Cc: John@Groves.net
> >>>>>
> >>>>> ---
> >>>>>
> >>>>> Splitting this off from the rest of my series[1] as a separate 
> >>>>> clean-up
> >>>>> for consideration for the v6.16 merge window as suggested by 
> >>>>> Christoph.
> >>>>>
> >>>>> [1] - 
> >>>>> https://lore.kernel.org/linux-mm/cover.541c2702181b7461b84f1a6967a3f0e823023fcc.1748500293.git-series.apopple@nvidia.com/
> >>>>> ---
> >>>>>     include/linux/pfn_t.h             | 31 
> >>>>> +++----------------------------
> >>>>>     mm/memory.c                       |  2 --
> >>>>>     tools/testing/nvdimm/test/iomap.c |  4 ----
> >>>>>     3 files changed, 3 insertions(+), 34 deletions(-)
> >>>> This patch landed in today's linux-next as commit 28be5676b4a3 ("mm:
> >>>> remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST"). In my 
> >>>> tests
> >>>> I've noticed that it breaks operation of all RISC-V 64bit boards on my
> >>>> test farm (VisionFive2, BananaPiF3 as well as QEMU's Virt machine). 
> >>>> I've
> >>>> isolated the changes responsible for this issue, see the inline 
> >>>> comments
> >>>> in the patch below. Here is an example of the issues observed in the
> >>>> logs from those machines:
> >>> Thanks for the report. I'm really confused by this because this 
> >>> change should
> >>> just be removal of dead code - nothing sets any of the removed PFN_* 
> >>> flags
> >>> AFAICT.
> >>>
> >>> I don't have access to any RISC-V hardwdare but you say this 
> >>> reproduces under
> >>> qemu - what do you run on the system to cause the error? Is it just 
> >>> a simple
> >>> boot and load a module or are you running selftests or something else?
> >>
> >> It fails a simple boot test. Here is a detailed instruction how to
> >> reproduce this issue with the random Debian rootfs image found on the
> >> internet (tested on Ubuntu 22.04, with next-20250610
> >> kernel source):
> >
> > riscv is one of the archs where pte_mkdevmap() will *not* set the pte 
> > as special. (I
> > raised this recently in the original series, it's all a big mess)
> >
> > So, before this change here, pfn_t_devmap() would have returned 
> > "false" if only
> > PFN_DEV was set, now it would return "true" if only PFN_DEV is set.

Ugh, what a mess. Thanks for pointing that out (I had seen your earlier response
to the original series but hadn't found the time to look into it more deeply).

> > Consequently, in insert_pfn() we would have done a pte_mkspecial(), 
> > now we do a
> > pte_mkdevmap() -- again, which does not imply "special" on riscv.
> >
> > riscv selects CONFIG_ARCH_HAS_PTE_SPECIAL, so if !pte_special(), it's 
> > considered as
> > normal.
> >
> > Would the following fix your issue?
> >
> >
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 8eba595056fe3..0e972c3493692 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -589,6 +589,10 @@ struct page *vm_normal_page(struct vm_area_struct 
> > *vma, unsigned long addr,
> >  {
> >         unsigned long pfn = pte_pfn(pte);
> >
> > +       /* TODO: remove this crap and set pte_special() instead. */
> > +       if (pte_devmap(pte))
> > +               return NULL;
> > +
> >         if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
> >                 if (likely(!pte_special(pte)))
> >                         goto check_pfn;
> > @@ -598,16 +602,6 @@ struct page *vm_normal_page(struct vm_area_struct 
> > *vma, unsigned long addr,
> >                         return NULL;
> >                 if (is_zero_pfn(pfn))
> >                         return NULL;
> > -               if (pte_devmap(pte))
> > -               /*
> > -                * NOTE: New users of ZONE_DEVICE will not set 
> > pte_devmap()
> > -                * and will have refcounts incremented on their struct 
> > pages
> > -                * when they are inserted into PTEs, thus they are 
> > safe to
> > -                * return here. Legacy ZONE_DEVICE pages that set 
> > pte_devmap()
> > -                * do not have refcounts. Example of legacy 
> > ZONE_DEVICE is
> > -                * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs 
> > drivers.
> > -                */
> > -                       return NULL;
> >
> >                 print_bad_pte(vma, addr, pte, NULL);
> >                 return NULL;
> >
> >
> > But, I would have thought the later patches in Alistairs series would 
> > sort that out
> > (where we remove pte_devmap() ... )
> >

Yes, I think Marek confirmed that it did in his earlier reply.

> The above change fixes the issues observed on RISCV boards.

Thanks for testing. Andrew has already removed this from the -mm tree so I'll
reincorporate this back into the series and see if I can figure something out
when I respin it.

- Alistair

> Best regards
> -- 
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-06-11  8:58 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-04  3:21 [PATCH] mm: Remove PFN_MAP, PFN_SPECIAL, PFN_SG_CHAIN and PFN_SG_LAST Alistair Popple
     [not found] ` <CGME20250610161811eucas1p18de4ba7b320b6d6ff7da44786b350b6e@eucas1p1.samsung.com>
2025-06-10 16:18   ` Marek Szyprowski
2025-06-11  2:38     ` Alistair Popple
2025-06-11  8:03       ` Marek Szyprowski
2025-06-11  8:23         ` David Hildenbrand
2025-06-11  8:42           ` Marek Szyprowski
2025-06-11  8:58             ` Alistair Popple

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox