linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Nicholas Piggin <npiggin@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Peter Xu <peterx@redhat.com>,
	Oscar Salvador <osalvador@suse.de>,
	Michael Ellerman <mpe@ellerman.id.au>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linuxppc-dev@lists.ozlabs.org" <linuxppc-dev@lists.ozlabs.org>
Subject: Re: [RFC PATCH v2 18/20] powerpc/64s: Use contiguous PMD/PUD instead of HUGEPD
Date: Mon, 20 May 2024 16:43:19 +0000	[thread overview]
Message-ID: <99575c2c-7840-4fa4-b84e-aaddc7fef4cb@csgroup.eu> (raw)
In-Reply-To: <D1EHK0STZ19E.3CTOAWG7LVBPK@gmail.com>



Le 20/05/2024 à 14:54, Nicholas Piggin a écrit :
> On Sat May 18, 2024 at 5:00 AM AEST, Christophe Leroy wrote:
>> On book3s/64, the only user of hugepd is hash in 4k mode.
>>
>> All other setups (hash-64, radix-4, radix-64) use leaf PMD/PUD.
>>
>> Rework hash-4k to use contiguous PMD and PUD instead.
>>
>> In that setup there are only two huge page sizes: 16M and 16G.
>>
>> 16M sits at PMD level and 16G at PUD level.
>>
>> pte_update doesn't know page size, lets use the same trick as
>> hpte_need_flush() to get page size from segment properties. That's
>> not the most efficient way but let's do that until callers of
>> pte_update() provide page size instead of just a huge flag.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   arch/powerpc/include/asm/book3s/64/hash-4k.h  | 15 --------
>>   arch/powerpc/include/asm/book3s/64/hash.h     | 38 +++++++++++++++----
>>   arch/powerpc/include/asm/book3s/64/hugetlb.h  | 38 -------------------
>>   .../include/asm/book3s/64/pgtable-4k.h        | 34 -----------------
>>   .../include/asm/book3s/64/pgtable-64k.h       | 20 ----------
>>   arch/powerpc/include/asm/hugetlb.h            |  4 ++
>>   .../include/asm/nohash/32/hugetlb-8xx.h       |  4 --
>>   .../powerpc/include/asm/nohash/hugetlb-e500.h |  4 --
>>   arch/powerpc/include/asm/page.h               |  8 ----
>>   arch/powerpc/mm/book3s64/hash_utils.c         | 11 ++++--
>>   arch/powerpc/mm/book3s64/pgtable.c            | 12 ------
>>   arch/powerpc/mm/hugetlbpage.c                 | 19 ----------
>>   arch/powerpc/mm/pgtable.c                     |  2 +-
>>   arch/powerpc/platforms/Kconfig.cputype        |  1 -
>>   14 files changed, 43 insertions(+), 167 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
>> index 6472b08fa1b0..c654c376ef8b 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
>> @@ -74,21 +74,6 @@
>>   #define remap_4k_pfn(vma, addr, pfn, prot)	\
>>   	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
>>   
>> -#ifdef CONFIG_HUGETLB_PAGE
>> -static inline int hash__hugepd_ok(hugepd_t hpd)
>> -{
>> -	unsigned long hpdval = hpd_val(hpd);
>> -	/*
>> -	 * if it is not a pte and have hugepd shift mask
>> -	 * set, then it is a hugepd directory pointer
>> -	 */
>> -	if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
>> -	    ((hpdval & HUGEPD_SHIFT_MASK) != 0))
>> -		return true;
>> -	return false;
>> -}
>> -#endif
>> -
>>   /*
>>    * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
>>    * a matter of returning the PTE bits that need to be modified. On 64K PTE,
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
>> index faf3e3b4e4b2..509811ca7695 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
>> @@ -4,6 +4,7 @@
>>   #ifdef __KERNEL__
>>   
>>   #include <asm/asm-const.h>
>> +#include <asm/book3s/64/slice.h>
>>   
>>   /*
>>    * Common bits between 4K and 64K pages in a linux-style PTE.
>> @@ -161,14 +162,10 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>>   			    pte_t *ptep, unsigned long pte, int huge);
>>   unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags);
>>   /* Atomic PTE updates */
>> -static inline unsigned long hash__pte_update(struct mm_struct *mm,
>> -					 unsigned long addr,
>> -					 pte_t *ptep, unsigned long clr,
>> -					 unsigned long set,
>> -					 int huge)
>> +static inline unsigned long hash__pte_update_one(pte_t *ptep, unsigned long clr,
>> +						 unsigned long set)
>>   {
>>   	__be64 old_be, tmp_be;
>> -	unsigned long old;
>>   
>>   	__asm__ __volatile__(
>>   	"1:	ldarx	%0,0,%3		# pte_update\n\
>> @@ -182,11 +179,38 @@ static inline unsigned long hash__pte_update(struct mm_struct *mm,
>>   	: "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
>>   	  "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
>>   	: "cc" );
>> +
>> +	return be64_to_cpu(old_be);
>> +}
>> +
>> +static inline unsigned long hash__pte_update(struct mm_struct *mm,
>> +					 unsigned long addr,
>> +					 pte_t *ptep, unsigned long clr,
>> +					 unsigned long set,
>> +					 int huge)
>> +{
>> +	unsigned long old;
>> +
>> +	old = hash__pte_update_one(ptep, clr, set);
>> +
>> +	if (huge && IS_ENABLED(CONFIG_PPC_4K_PAGES)) {
>> +		unsigned int psize = get_slice_psize(mm, addr);
>> +		int nb, i;
>> +
>> +		if (psize == MMU_PAGE_16M)
>> +			nb = SZ_16M / PMD_SIZE;
>> +		else if (psize == MMU_PAGE_16G)
>> +			nb = SZ_16G / PUD_SIZE;
>> +		else
>> +			nb = 1;
>> +
>> +		for (i = 1; i < nb; i++)
>> +			hash__pte_update_one(ptep + i, clr, set);
>> +	}
>>   	/* huge pages use the old page table lock */
>>   	if (!huge)
>>   		assert_pte_locked(mm, addr);
>>   
>> -	old = be64_to_cpu(old_be);
>>   	if (old & H_PAGE_HASHPTE)
>>   		hpte_need_flush(mm, addr, ptep, old, huge);
>>   
> 
> Nice series, I don't know this hugepd code very well but I'll try.
> Why do you have to replicate the PTE entry here? The hash table refill
> should always be working on the first PTE of the page otherwise we have
> bigger problems.

I don't know how book3s/64 works exactly, but on nohash, when you get a 
TLB miss exception the only thing you have is the address and you don't 
know yes it is a hugepage so you get the PTE as if it was a 4k page and 
it is only when you read that PTE that you know it is a hugepage.

Ok, on book3s/64 the page size seems to be encoded inside the segment so 
maybe it is a bit different but anyway the TLB miss exception (or DSI ?) 
can happen at any address.

> 
> What paths look at the N > 0 PTEs of a contiguous page entry?
> 

pte_offset_kernel() or pte_offset_map_lock() will land on any contiguous 
PTE based on the address handed to pte_index(), as if it was a standard 
(4k or 64k) page.

pte_index() doesn't know it is a hugepage, that's the reason why we need 
to duplicate the entry.

  reply	other threads:[~2024-05-20 16:43 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-17 18:59 [RFC PATCH v2 00/20] Reimplement huge pages without hugepd on powerpc (8xx, e500, book3s/64) Christophe Leroy
2024-05-17 18:59 ` [RFC PATCH v2 01/20] mm: Provide pagesize to pmd_populate() Christophe Leroy
2024-05-20  9:01   ` Oscar Salvador
2024-05-20 16:24     ` Christophe Leroy
2024-05-21 11:57       ` Oscar Salvador
2024-05-22  8:37         ` Christophe Leroy
2024-05-17 18:59 ` [RFC PATCH v2 02/20] mm: Provide page size to pte_alloc_huge() Christophe Leroy
2024-05-17 18:59 ` [RFC PATCH v2 03/20] mm: Provide pmd to pte_leaf_size() Christophe Leroy
2024-05-21  9:39   ` Oscar Salvador
2024-05-22 10:22     ` Christophe Leroy
2024-05-17 18:59 ` [RFC PATCH v2 04/20] mm: Provide mm_struct and address to huge_ptep_get() Christophe Leroy
2024-05-17 18:59 ` [RFC PATCH v2 05/20] powerpc/mm: Allow hugepages without hugepd Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 06/20] powerpc/8xx: Fix size given to set_huge_pte_at() Christophe Leroy
2024-05-20  9:14   ` Oscar Salvador
2024-05-20 16:31     ` Christophe Leroy
2024-05-20 17:42       ` Oscar Salvador
2024-05-22  8:45         ` Christophe Leroy
2024-05-21  0:48       ` Michael Ellerman
2024-05-21  9:26         ` Oscar Salvador
2024-05-22  8:32           ` Christophe Leroy
2024-05-22 12:18             ` Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 07/20] powerpc/8xx: Rework support for 8M pages using contiguous PTE entries Christophe Leroy
2024-05-24 10:02   ` Oscar Salvador
2024-05-24 11:47     ` Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 08/20] powerpc/8xx: Simplify struct mmu_psize_def Christophe Leroy
2024-05-25  3:36   ` Oscar Salvador
2024-05-17 19:00 ` [RFC PATCH v2 09/20] powerpc/mm: Remove _PAGE_PSIZE Christophe Leroy
2024-05-25  3:40   ` Oscar Salvador
2024-05-17 19:00 ` [RFC PATCH v2 10/20] powerpc/mm: Fix __find_linux_pte() on 32 bits with PMD leaf entries Christophe Leroy
2024-05-25  4:12   ` Oscar Salvador
2024-05-25  6:41     ` Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 11/20] powerpc/mm: Complement huge_pte_alloc() for all non HUGEPD setups Christophe Leroy
2024-05-25  4:29   ` Oscar Salvador
2024-05-25  6:44     ` Christophe Leroy
2024-05-25 10:33       ` Oscar Salvador
2024-05-17 19:00 ` [RFC PATCH v2 12/20] powerpc/64e: Remove unneeded #ifdef CONFIG_PPC_E500 Christophe Leroy
2024-05-24  7:31   ` Michael Ellerman
2024-05-24  8:45     ` Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 13/20] powerpc/64e: Clean up impossible setups Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 14/20] powerpc/e500: Remove enc field from struct mmu_psize_def Christophe Leroy
2024-05-25  4:35   ` Oscar Salvador
2024-05-17 19:00 ` [RFC PATCH v2 15/20] powerpc/85xx: Switch to 64 bits PGD Christophe Leroy
2024-05-25  4:54   ` Oscar Salvador
2024-05-25  9:02     ` Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 16/20] powerpc/e500: Encode hugepage size in PTE bits Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 17/20] powerpc/e500: Use contiguous PMD instead of hugepd Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 18/20] powerpc/64s: Use contiguous PMD/PUD instead of HUGEPD Christophe Leroy
2024-05-20 12:54   ` Nicholas Piggin
2024-05-20 16:43     ` Christophe Leroy [this message]
2024-05-22  1:13       ` Nicholas Piggin
2024-05-22  9:32         ` Christophe Leroy
2024-05-22 12:23         ` Jason Gunthorpe
2024-05-17 19:00 ` [RFC PATCH v2 19/20] powerpc/mm: Remove hugepd leftovers Christophe Leroy
2024-05-17 19:00 ` [RFC PATCH v2 20/20] mm: Remove CONFIG_ARCH_HAS_HUGEPD Christophe Leroy
2024-05-17 19:06 ` [RFC PATCH v2 00/20] Reimplement huge pages without hugepd on powerpc (8xx, e500, book3s/64) Jason Gunthorpe
2024-05-18  6:28   ` Christophe Leroy
2024-05-23 19:40 ` Peter Xu
2024-05-24  4:46   ` Michael Ellerman
2024-05-27 14:14     ` Peter Xu
2024-05-24  6:31   ` Oscar Salvador

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=99575c2c-7840-4fa4-b84e-aaddc7fef4cb@csgroup.eu \
    --to=christophe.leroy@csgroup.eu \
    --cc=akpm@linux-foundation.org \
    --cc=jgg@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=osalvador@suse.de \
    --cc=peterx@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox