linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Kefeng Wang <wangkefeng.wang@huawei.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: "surenb@google.com" <surenb@google.com>,
	"willy@infradead.org" <willy@infradead.org>,
	Russell King <linux@armlinux.org.uk>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Huacai Chen <chenhuacai@kernel.org>,
	WANG Xuerui <kernel@xen0n.name>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Nicholas Piggin <npiggin@gmail.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	Alexander Gordeev <agordeev@linux.ibm.com>,
	Gerald Schaefer <gerald.schaefer@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"x86@kernel.org" <x86@kernel.org>,
	"H . Peter Anvin" <hpa@zytor.com>,
	"linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"loongarch@lists.linux.dev" <loongarch@lists.linux.dev>,
	"linuxppc-dev@lists.ozlabs.org" <linuxppc-dev@lists.ozlabs.org>,
	"linux-riscv@lists.infradead.org"
	<linux-riscv@lists.infradead.org>,
	"linux-s390@vger.kernel.org" <linux-s390@vger.kernel.org>
Subject: Re: [PATCH rfc v2 05/10] powerpc: mm: use try_vma_locked_page_fault()
Date: Tue, 22 Aug 2023 09:38:47 +0000	[thread overview]
Message-ID: <7eeed961-c2c0-2aeb-ff8c-3717de09d605@csgroup.eu> (raw)
In-Reply-To: <20230821123056.2109942-6-wangkefeng.wang@huawei.com>



Le 21/08/2023 à 14:30, Kefeng Wang a écrit :
> Use new try_vma_locked_page_fault() helper to simplify code.
> No functional change intended.

Does it really simplifies code ? It's 32 insertions versus 34 deletions 
so only removing 2 lines.

I don't like the struct vm_fault you are adding because when it was four 
independant variables it was handled through local registers. Now that 
it is a struct it has to go via the stack, leading to unnecessary memory 
read and writes. And going back and forth between architecture code and 
generic code may also be counter-performant.

Did you make any performance analysis ? Page faults are really a hot 
path when dealling with minor faults.

Thanks
Christophe

> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
>   1 file changed, 32 insertions(+), 34 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index b1723094d464..52f9546e020e 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
>   #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
>   #endif
>   
> +#ifdef CONFIG_PER_VMA_LOCK
> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
> +	int is_write = page_fault_is_write(vmf->fault_code);
> +
> +	if (unlikely(access_pkey_error(is_write, is_exec,
> +				(vmf->fault_code & DSISR_KEYFAULT), vma)))
> +		return true;
> +
> +	if (unlikely(access_error(is_write, is_exec, vma)))
> +		return true;
> +	return false;
> +}
> +#endif
> +
>   /*
>    * For 600- and 800-family processors, the error_code parameter is DSISR
>    * for a data fault, SRR1 for an instruction fault.
> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   {
>   	struct vm_area_struct * vma;
>   	struct mm_struct *mm = current->mm;
> -	unsigned int flags = FAULT_FLAG_DEFAULT;
>   	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
>   	int is_user = user_mode(regs);
>   	int is_write = page_fault_is_write(error_code);
>   	vm_fault_t fault, major = 0;
>   	bool kprobe_fault = kprobe_page_fault(regs, 11);
> +	struct vm_fault vmf = {
> +		.real_address = address,
> +		.fault_code = error_code,
> +		.regs = regs,
> +		.flags = FAULT_FLAG_DEFAULT,
> +	};
> +
>   
>   	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
>   		return 0;
> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * mmap_lock held
>   	 */
>   	if (is_user)
> -		flags |= FAULT_FLAG_USER;
> +		vmf.flags |= FAULT_FLAG_USER;
>   	if (is_write)
> -		flags |= FAULT_FLAG_WRITE;
> +		vmf.flags |= FAULT_FLAG_WRITE;
>   	if (is_exec)
> -		flags |= FAULT_FLAG_INSTRUCTION;
> +		vmf.flags |= FAULT_FLAG_INSTRUCTION;
>   
> -	if (!(flags & FAULT_FLAG_USER))
> -		goto lock_mmap;
> -
> -	vma = lock_vma_under_rcu(mm, address);
> -	if (!vma)
> -		goto lock_mmap;
> -
> -	if (unlikely(access_pkey_error(is_write, is_exec,
> -				       (error_code & DSISR_KEYFAULT), vma))) {
> -		vma_end_read(vma);
> -		goto lock_mmap;
> -	}
> -
> -	if (unlikely(access_error(is_write, is_exec, vma))) {
> -		vma_end_read(vma);
> -		goto lock_mmap;
> -	}
> -
> -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> -		vma_end_read(vma);
> -
> -	if (!(fault & VM_FAULT_RETRY)) {
> -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +	fault = try_vma_locked_page_fault(&vmf);
> +	if (fault == VM_FAULT_NONE)
> +		goto retry;
> +	if (!(fault & VM_FAULT_RETRY))
>   		goto done;
> -	}
> -	count_vm_vma_lock_event(VMA_LOCK_RETRY);
>   
>   	if (fault_signal_pending(fault, regs))
>   		return user_mode(regs) ? 0 : SIGBUS;
>   
> -lock_mmap:
> -
>   	/* When running in the kernel we expect faults to occur only to
>   	 * addresses in user space.  All other faults represent errors in the
>   	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * make sure we exit gracefully rather than endlessly redo
>   	 * the fault.
>   	 */
> -	fault = handle_mm_fault(vma, address, flags, regs);
> +	fault = handle_mm_fault(vma, address, vmf.flags, regs);
>   
>   	major |= fault & VM_FAULT_MAJOR;
>   
> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * case.
>   	 */
>   	if (unlikely(fault & VM_FAULT_RETRY)) {
> -		flags |= FAULT_FLAG_TRIED;
> +		vmf.flags |= FAULT_FLAG_TRIED;
>   		goto retry;
>   	}
>   

  reply	other threads:[~2023-08-22  9:38 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-21 12:30 [PATCH rfc -next v2 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 01/10] mm: add a generic VMA lock-based page fault handler Kefeng Wang
2023-08-21 15:13   ` kernel test robot
2023-08-22  2:33     ` Kefeng Wang
2023-08-24  7:12   ` Alexander Gordeev
2023-08-26  0:56     ` Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 02/10] arm64: mm: use try_vma_locked_page_fault() Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 03/10] x86: " Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 04/10] s390: " Kefeng Wang
2023-08-24  8:16   ` Alexander Gordeev
     [not found]     ` <20230824083225.10112-A-hca@linux.ibm.com>
2023-08-26  1:07       ` Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 05/10] powerpc: " Kefeng Wang
2023-08-22  9:38   ` Christophe Leroy [this message]
2023-08-22 12:12     ` Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 06/10] riscv: " Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 07/10] ARM: mm: try VMA lock-based page fault handling first Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 08/10] loongarch: mm: cleanup __do_page_fault() Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 09/10] loongarch: mm: add access_error() helper Kefeng Wang
2023-08-21 12:30 ` [PATCH rfc v2 10/10] loongarch: mm: try VMA lock-based page fault handling first Kefeng Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7eeed961-c2c0-2aeb-ff8c-3717de09d605@csgroup.eu \
    --to=christophe.leroy@csgroup.eu \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=borntraeger@linux.ibm.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=chenhuacai@kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=hpa@zytor.com \
    --cc=kernel@xen0n.name \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=loongarch@lists.linux.dev \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=peterz@infradead.org \
    --cc=surenb@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=wangkefeng.wang@huawei.com \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox