* [PATCH v3 1/3] mm: hugetlb: Add huge page size param to huge_ptep_get_and_clear()
2025-02-26 12:06 [PATCH v3 0/3] Fixes for hugetlb on arm64 Ryan Roberts
@ 2025-02-26 12:06 ` Ryan Roberts
2025-02-26 14:00 ` Alexander Gordeev
2025-02-26 12:06 ` [PATCH v3 2/3] arm64: hugetlb: Fix huge_ptep_get_and_clear() for non-present ptes Ryan Roberts
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Ryan Roberts @ 2025-02-26 12:06 UTC (permalink / raw)
To: Catalin Marinas, Will Deacon, Huacai Chen, WANG Xuerui,
Thomas Bogendoerfer, James E.J. Bottomley, Helge Deller,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy, Naveen N Rao, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
Christian Borntraeger, Sven Schnelle, Gerald Schaefer,
David S. Miller, Andreas Larsson, Arnd Bergmann, Muchun Song,
Andrew Morton, Uladzislau Rezki, Christoph Hellwig,
David Hildenbrand, Matthew Wilcox (Oracle),
Mark Rutland, Anshuman Khandual, Dev Jain, Kevin Brodsky,
Alexandre Ghiti
Cc: Ryan Roberts, linux-arm-kernel, linux-mm, linux-kernel, stable
In order to fix a bug, arm64 needs to be told the size of the huge page
for which the huge_pte is being cleared in huge_ptep_get_and_clear().
Provide for this by adding an `unsigned long sz` parameter to the
function. This follows the same pattern as huge_pte_clear() and
set_huge_pte_at().
This commit makes the required interface modifications to the core mm as
well as all arches that implement this function (arm64, loongarch, mips,
parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed
in a separate commit.
Cc: stable@vger.kernel.org
Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> # riscv
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
arch/arm64/include/asm/hugetlb.h | 4 ++--
arch/arm64/mm/hugetlbpage.c | 8 +++++---
arch/loongarch/include/asm/hugetlb.h | 6 ++++--
arch/mips/include/asm/hugetlb.h | 6 ++++--
arch/parisc/include/asm/hugetlb.h | 2 +-
arch/parisc/mm/hugetlbpage.c | 2 +-
arch/powerpc/include/asm/hugetlb.h | 6 ++++--
arch/riscv/include/asm/hugetlb.h | 3 ++-
arch/riscv/mm/hugetlbpage.c | 2 +-
arch/s390/include/asm/hugetlb.h | 16 ++++++++++++----
arch/s390/mm/hugetlbpage.c | 4 ++--
arch/sparc/include/asm/hugetlb.h | 2 +-
arch/sparc/mm/hugetlbpage.c | 2 +-
include/asm-generic/hugetlb.h | 2 +-
include/linux/hugetlb.h | 4 +++-
mm/hugetlb.c | 4 ++--
16 files changed, 46 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index c6dff3e69539..03db9cb21ace 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 98a2a0e64e25..06db4649af91 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -396,8 +396,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
__pte_clear(mm, addr, ptep);
}
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
{
int ncontig;
size_t pgsize;
@@ -549,6 +549,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
/*
* Break-before-make (BBM) is required for all user space mappings
@@ -558,7 +560,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr
if (pte_user_exec(__ptep_get(ptep)))
return huge_ptep_clear_flush(vma, addr, ptep);
}
- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
}
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h
index c8e4057734d0..4dc4b3e04225 100644
--- a/arch/loongarch/include/asm/hugetlb.h
+++ b/arch/loongarch/include/asm/hugetlb.h
@@ -36,7 +36,8 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
{
pte_t clear;
pte_t pte = ptep_get(ptep);
@@ -51,8 +52,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pte_t pte;
+ unsigned long sz = huge_page_size(hstate_vma(vma));
- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
flush_tlb_page(vma, addr);
return pte;
}
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index d0a86ce83de9..fbc71ddcf0f6 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -27,7 +27,8 @@ static inline int prepare_hugepage_range(struct file *file,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
{
pte_t clear;
pte_t pte = *ptep;
@@ -42,13 +43,14 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pte_t pte;
+ unsigned long sz = huge_page_size(hstate_vma(vma));
/*
* clear the huge pte entry firstly, so that the other smp threads will
* not get old pte entry after finishing flush_tlb_page and before
* setting new huge pte entry
*/
- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
flush_tlb_page(vma, addr);
return pte;
}
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
index 5b3a5429f71b..21e9ace17739 100644
--- a/arch/parisc/include/asm/hugetlb.h
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -10,7 +10,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
+ pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index e9d18cf25b79..a94fe546d434 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -126,7 +126,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+ pte_t *ptep, unsigned long sz)
{
pte_t entry;
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index dad2e7980f24..86326587e58d 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
{
return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
}
@@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pte_t pte;
+ unsigned long sz = huge_page_size(hstate_vma(vma));
- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
flush_hugetlb_page(vma, addr);
return pte;
}
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index faf3624d8057..446126497768 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *mm,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 42314f093922..b4a78a4b35cf 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr,
- pte_t *ptep)
+ pte_t *ptep, unsigned long sz)
{
pte_t orig_pte = ptep_get(ptep);
int pte_num;
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 7c52acaf9f82..663e87220e89 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -25,8 +25,16 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_PTEP_GET
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
+{
+ return __huge_ptep_get_and_clear(mm, addr, ptep);
+}
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@@ -48,7 +56,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+ return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
@@ -59,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
if (changed) {
- huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
return changed;
@@ -69,7 +77,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep);
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index d9ce199953de..2e568f175cd4 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -188,8 +188,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
return __rste_to_pte(pte_val(*ptep));
}
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
pte_t pte = huge_ptep_get(mm, addr, ptep);
pmd_t *pmdp = (pmd_t *) ptep;
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index c714ca6a05aa..e7a9cdd498dc 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -20,7 +20,7 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
+ pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index eee601a0d2cf..80504148d8a5 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -260,7 +260,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+ pte_t *ptep, unsigned long sz)
{
unsigned int i, nptes, orig_shift, shift;
unsigned long size;
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index f42133dae68e..2afc95bf1655 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -90,7 +90,7 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
#ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep, unsigned long sz)
{
return ptep_get_and_clear(mm, addr, ptep);
}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..bf5f7256bd28 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1004,7 +1004,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
}
#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65068671e460..de9d49e521c1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5447,7 +5447,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
if (src_ptl != dst_ptl)
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
- pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
+ pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
huge_pte_clear(mm, new_addr, dst_pte, sz);
@@ -5622,7 +5622,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
}
- pte = huge_ptep_get_and_clear(mm, address, ptep);
+ pte = huge_ptep_get_and_clear(mm, address, ptep, sz);
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
if (huge_pte_dirty(pte))
set_page_dirty(page);
--
2.43.0
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH v3 2/3] arm64: hugetlb: Fix huge_ptep_get_and_clear() for non-present ptes
2025-02-26 12:06 [PATCH v3 0/3] Fixes for hugetlb on arm64 Ryan Roberts
2025-02-26 12:06 ` [PATCH v3 1/3] mm: hugetlb: Add huge page size param to huge_ptep_get_and_clear() Ryan Roberts
@ 2025-02-26 12:06 ` Ryan Roberts
2025-02-26 17:41 ` Catalin Marinas
2025-02-26 12:06 ` [PATCH v3 3/3] arm64: hugetlb: Fix flush_hugetlb_tlb_range() invalidation level Ryan Roberts
2025-02-27 18:50 ` [PATCH v3 0/3] Fixes for hugetlb on arm64 Will Deacon
3 siblings, 1 reply; 8+ messages in thread
From: Ryan Roberts @ 2025-02-26 12:06 UTC (permalink / raw)
To: Catalin Marinas, Will Deacon, Huacai Chen, WANG Xuerui,
Thomas Bogendoerfer, James E.J. Bottomley, Helge Deller,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy, Naveen N Rao, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
Christian Borntraeger, Sven Schnelle, Gerald Schaefer,
David S. Miller, Andreas Larsson, Arnd Bergmann, Muchun Song,
Andrew Morton, Uladzislau Rezki, Christoph Hellwig,
David Hildenbrand, Matthew Wilcox (Oracle),
Mark Rutland, Anshuman Khandual, Dev Jain, Kevin Brodsky,
Alexandre Ghiti
Cc: Ryan Roberts, linux-arm-kernel, linux-mm, linux-kernel, stable
arm64 supports multiple huge_pte sizes. Some of the sizes are covered by
a single pte entry at a particular level (PMD_SIZE, PUD_SIZE), and some
are covered by multiple ptes at a particular level (CONT_PTE_SIZE,
CONT_PMD_SIZE). So the function has to figure out the size from the
huge_pte pointer. This was previously done by walking the pgtable to
determine the level and by using the PTE_CONT bit to determine the
number of ptes at the level.
But the PTE_CONT bit is only valid when the pte is present. For
non-present pte values (e.g. markers, migration entries), the previous
implementation was therefore erroneously determining the size. There is
at least one known caller in core-mm, move_huge_pte(), which may call
huge_ptep_get_and_clear() for a non-present pte. So we must be robust to
this case. Additionally the "regular" ptep_get_and_clear() is robust to
being called for non-present ptes so it makes sense to follow the
behavior.
Fix this by using the new sz parameter which is now provided to the
function. Additionally when clearing each pte in a contig range, don't
gather the access and dirty bits if the pte is not present.
An alternative approach that would not require API changes would be to
store the PTE_CONT bit in a spare bit in the swap entry pte for the
non-present case. But it felt cleaner to follow other APIs' lead and
just pass in the size.
As an aside, PTE_CONT is bit 52, which corresponds to bit 40 in the swap
entry offset field (layout of non-present pte). Since hugetlb is never
swapped to disk, this field will only be populated for markers, which
always set this bit to 0 and hwpoison swap entries, which set the offset
field to a PFN; So it would only ever be 1 for a 52-bit PVA system where
memory in that high half was poisoned (I think!). So in practice, this
bit would almost always be zero for non-present ptes and we would only
clear the first entry if it was actually a contiguous block. That's
probably a less severe symptom than if it was always interpreted as 1
and cleared out potentially-present neighboring PTEs.
Cc: stable@vger.kernel.org
Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
tmp
---
arch/arm64/mm/hugetlbpage.c | 53 ++++++++++++++-----------------------
1 file changed, 20 insertions(+), 33 deletions(-)
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 06db4649af91..b3a7fafe8892 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
{
- int contig_ptes = 0;
+ int contig_ptes = 1;
*pgsize = size;
switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- if (pud_sect_supported())
- contig_ptes = 1;
- break;
-#endif
- case PMD_SIZE:
- contig_ptes = 1;
- break;
case CONT_PMD_SIZE:
*pgsize = PMD_SIZE;
contig_ptes = CONT_PMDS;
@@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
*pgsize = PAGE_SIZE;
contig_ptes = CONT_PTES;
break;
+ default:
+ WARN_ON(!__hugetlb_valid_size(size));
}
return contig_ptes;
@@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long pgsize,
unsigned long ncontig)
{
- pte_t orig_pte = __ptep_get(ptep);
- unsigned long i;
-
- for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
- pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
-
- /*
- * If HW_AFDBM is enabled, then the HW could turn on
- * the dirty or accessed bit for any page in the set,
- * so check them all.
- */
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
+ pte_t pte, tmp_pte;
+ bool present;
+
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ present = pte_present(pte);
+ while (--ncontig) {
+ ptep++;
+ addr += pgsize;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (present) {
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
}
- return orig_pte;
+ return pte;
}
static pte_t get_clear_contig_flush(struct mm_struct *mm,
@@ -401,13 +393,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
{
int ncontig;
size_t pgsize;
- pte_t orig_pte = __ptep_get(ptep);
-
- if (!pte_cont(orig_pte))
- return __ptep_get_and_clear(mm, addr, ptep);
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ ncontig = num_contig_ptes(sz, &pgsize);
return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
}
--
2.43.0
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH v3 3/3] arm64: hugetlb: Fix flush_hugetlb_tlb_range() invalidation level
2025-02-26 12:06 [PATCH v3 0/3] Fixes for hugetlb on arm64 Ryan Roberts
2025-02-26 12:06 ` [PATCH v3 1/3] mm: hugetlb: Add huge page size param to huge_ptep_get_and_clear() Ryan Roberts
2025-02-26 12:06 ` [PATCH v3 2/3] arm64: hugetlb: Fix huge_ptep_get_and_clear() for non-present ptes Ryan Roberts
@ 2025-02-26 12:06 ` Ryan Roberts
2025-02-27 18:50 ` [PATCH v3 0/3] Fixes for hugetlb on arm64 Will Deacon
3 siblings, 0 replies; 8+ messages in thread
From: Ryan Roberts @ 2025-02-26 12:06 UTC (permalink / raw)
To: Catalin Marinas, Will Deacon, Huacai Chen, WANG Xuerui,
Thomas Bogendoerfer, James E.J. Bottomley, Helge Deller,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy, Naveen N Rao, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
Christian Borntraeger, Sven Schnelle, Gerald Schaefer,
David S. Miller, Andreas Larsson, Arnd Bergmann, Muchun Song,
Andrew Morton, Uladzislau Rezki, Christoph Hellwig,
David Hildenbrand, Matthew Wilcox (Oracle),
Mark Rutland, Anshuman Khandual, Dev Jain, Kevin Brodsky,
Alexandre Ghiti
Cc: Ryan Roberts, linux-arm-kernel, linux-mm, linux-kernel, stable
commit c910f2b65518 ("arm64/mm: Update tlb invalidation routines for
FEAT_LPA2") changed the "invalidation level unknown" hint from 0 to
TLBI_TTL_UNKNOWN (INT_MAX). But the fallback "unknown level" path in
flush_hugetlb_tlb_range() was not updated. So as it stands, when trying
to invalidate CONT_PMD_SIZE or CONT_PTE_SIZE hugetlb mappings, we will
spuriously try to invalidate at level 0 on LPA2-enabled systems.
Fix this so that the fallback passes TLBI_TTL_UNKNOWN, and while we are
at it, explicitly use the correct stride and level for CONT_PMD_SIZE and
CONT_PTE_SIZE, which should provide a minor optimization.
Cc: stable@vger.kernel.org
Fixes: c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2")
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
arch/arm64/include/asm/hugetlb.h | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 03db9cb21ace..07fbf5bf85a7 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -76,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
{
unsigned long stride = huge_page_size(hstate_vma(vma));
- if (stride == PMD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 2);
- else if (stride == PUD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 1);
- else
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ switch (stride) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1);
+ break;
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2);
+ break;
+ case CONT_PTE_SIZE:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3);
+ break;
+ default:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
+ }
}
#endif /* __ASM_HUGETLB_H */
--
2.43.0
^ permalink raw reply [flat|nested] 8+ messages in thread