linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@alien8.de>
To: Tony Luck <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andy Lutomirski <luto@kernel.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Elliott@pd.tnic, Robert <elliott@hpe.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-nvdimm@ml01.01.org, x86@kernel.org
Subject: Re: [PATCHV3 3/3] x86, ras: Add mcsafe_memcpy() function to recover from machine checks
Date: Tue, 22 Dec 2015 12:13:49 +0100	[thread overview]
Message-ID: <20151222111349.GB3728@pd.tnic> (raw)
In-Reply-To: <d560d03663b6fd7a5bbeae9842934f329a7dcbdf.1450283985.git.tony.luck@intel.com>

On Tue, Dec 15, 2015 at 05:30:49PM -0800, Tony Luck wrote:
> Using __copy_user_nocache() as inspiration create a memory copy
> routine for use by kernel code with annotations to allow for
> recovery from machine checks.
> 
> Notes:
> 1) We align the source address rather than the destination. This
>    means we never have to deal with a memory read that spans two
>    cache lines ... so we can provide a precise indication of
>    where the error occurred without having to re-execute at
>    a byte-by-byte level to find the exact spot like the original
>    did.
> 2) We 'or' BIT(63) into the return because this is the first
>    in a series of machine check safe functions. Some will copy
>    from user addresses, so may need to indicate an invalid user
>    address instead of a machine check.
> 3) This code doesn't play any cache games. Future functions can
>    use non-temporal loads/stores to meet needs of different callers.
> 4) Provide helpful macros to decode the return value.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/include/asm/mcsafe_copy.h |  11 +++
>  arch/x86/kernel/x8664_ksyms_64.c   |   5 ++
>  arch/x86/lib/Makefile              |   1 +
>  arch/x86/lib/mcsafe_copy.S         | 142 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 159 insertions(+)
>  create mode 100644 arch/x86/include/asm/mcsafe_copy.h
>  create mode 100644 arch/x86/lib/mcsafe_copy.S
> 
> diff --git a/arch/x86/include/asm/mcsafe_copy.h b/arch/x86/include/asm/mcsafe_copy.h
> new file mode 100644
> index 000000000000..d4dbd5a667a3
> --- /dev/null
> +++ b/arch/x86/include/asm/mcsafe_copy.h
> @@ -0,0 +1,11 @@
> +#ifndef _ASM_X86_MCSAFE_COPY_H
> +#define _ASM_X86_MCSAFE_COPY_H
> +
> +u64 mcsafe_memcpy(void *dst, const void *src, unsigned size);
> +
> +#define	COPY_MCHECK_ERRBIT	BIT(63)

What happened to the landing pads Andy was talking about? They sound
like cleaner design than that bit 63...

> +#define COPY_HAD_MCHECK(ret)	((ret) & COPY_MCHECK_ERRBIT)
> +#define COPY_MCHECK_REMAIN(ret)	((ret) & ~COPY_MCHECK_ERRBIT)
> +
> +#endif /* _ASM_MCSAFE_COPY_H */

This should all be in arch/x86/include/asm/string_64.h I guess. You can
save yourself the #include-ing.

> diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
> index a0695be19864..afab8b25dbc0 100644
> --- a/arch/x86/kernel/x8664_ksyms_64.c
> +++ b/arch/x86/kernel/x8664_ksyms_64.c
> @@ -37,6 +37,11 @@ EXPORT_SYMBOL(__copy_user_nocache);
>  EXPORT_SYMBOL(_copy_from_user);
>  EXPORT_SYMBOL(_copy_to_user);
>  
> +#ifdef CONFIG_MCE_KERNEL_RECOVERY
> +#include <asm/mcsafe_copy.h>
> +EXPORT_SYMBOL(mcsafe_memcpy);
> +#endif
> +
>  EXPORT_SYMBOL(copy_page);
>  EXPORT_SYMBOL(clear_page);
>  
> diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
> index f2587888d987..82bb0bf46b6b 100644
> --- a/arch/x86/lib/Makefile
> +++ b/arch/x86/lib/Makefile
> @@ -21,6 +21,7 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
>  lib-y += memcpy_$(BITS).o
>  lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
>  lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
> +lib-$(CONFIG_MCE_KERNEL_RECOVERY) += mcsafe_copy.o
>  
>  obj-y += msr.o msr-reg.o msr-reg-export.o
>  
> diff --git a/arch/x86/lib/mcsafe_copy.S b/arch/x86/lib/mcsafe_copy.S
> new file mode 100644
> index 000000000000..059b3a9642eb
> --- /dev/null
> +++ b/arch/x86/lib/mcsafe_copy.S

You probably should add that function to arch/x86/lib/memcpy_64.S where
we keep all those memcpy variants instead of a separate file.

> @@ -0,0 +1,142 @@
> +/*
> + * Copyright (C) 2015 Intel Corporation
> + * Author: Tony Luck
> + *
> + * This software may be redistributed and/or modified under the terms of
> + * the GNU General Public License ("GPL") version 2 only as published by the
> + * Free Software Foundation.
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +/*
> + * mcsafe_memcpy - memory copy with machine check exception handling
> + * Note that we only catch machine checks when reading the source addresses.
> + * Writes to target are posted and don't generate machine checks.
> + */
> +ENTRY(mcsafe_memcpy)
> +	cmpl $8,%edx
> +	jb 20f		/* less then 8 bytes, go to byte copy loop */
> +
> +	/* check for bad alignment of source */
> +	movl %esi,%ecx
> +	andl $7,%ecx
> +	jz 102f				/* already aligned */
> +	subl $8,%ecx
> +	negl %ecx
> +	subl %ecx,%edx
> +0:	movb (%rsi),%al
> +	movb %al,(%rdi)
> +	incq %rsi
> +	incq %rdi
> +	decl %ecx
> +	jnz 0b
> +102:
> +	movl %edx,%ecx
> +	andl $63,%edx
> +	shrl $6,%ecx
> +	jz 17f
> +1:	movq (%rsi),%r8
> +2:	movq 1*8(%rsi),%r9
> +3:	movq 2*8(%rsi),%r10
> +4:	movq 3*8(%rsi),%r11
> +	mov %r8,(%rdi)
> +	mov %r9,1*8(%rdi)
> +	mov %r10,2*8(%rdi)
> +	mov %r11,3*8(%rdi)
> +9:	movq 4*8(%rsi),%r8
> +10:	movq 5*8(%rsi),%r9
> +11:	movq 6*8(%rsi),%r10
> +12:	movq 7*8(%rsi),%r11
> +	mov %r8,4*8(%rdi)
> +	mov %r9,5*8(%rdi)
> +	mov %r10,6*8(%rdi)
> +	mov %r11,7*8(%rdi)
> +	leaq 64(%rsi),%rsi
> +	leaq 64(%rdi),%rdi
> +	decl %ecx
> +	jnz 1b
> +17:	movl %edx,%ecx
> +	andl $7,%edx
> +	shrl $3,%ecx
> +	jz 20f
> +18:	movq (%rsi),%r8
> +	mov %r8,(%rdi)
> +	leaq 8(%rsi),%rsi
> +	leaq 8(%rdi),%rdi
> +	decl %ecx
> +	jnz 18b
> +20:	andl %edx,%edx
> +	jz 23f
> +	movl %edx,%ecx
> +21:	movb (%rsi),%al
> +	movb %al,(%rdi)
> +	incq %rsi
> +	incq %rdi
> +	decl %ecx
> +	jnz 21b
> +23:	xorl %eax,%eax
> +	sfence
> +	ret
> +
> +	.section .fixup,"ax"
> +30:
> +	addl %ecx,%edx
> +	jmp 100f
> +31:
> +	shll $6,%ecx
> +	addl %ecx,%edx
> +	jmp 100f
> +32:
> +	shll $6,%ecx
> +	leal -8(%ecx,%edx),%edx
> +	jmp 100f
> +33:
> +	shll $6,%ecx
> +	leal -16(%ecx,%edx),%edx
> +	jmp 100f
> +34:
> +	shll $6,%ecx
> +	leal -24(%ecx,%edx),%edx
> +	jmp 100f
> +35:
> +	shll $6,%ecx
> +	leal -32(%ecx,%edx),%edx
> +	jmp 100f
> +36:
> +	shll $6,%ecx
> +	leal -40(%ecx,%edx),%edx
> +	jmp 100f
> +37:
> +	shll $6,%ecx
> +	leal -48(%ecx,%edx),%edx
> +	jmp 100f
> +38:
> +	shll $6,%ecx
> +	leal -56(%ecx,%edx),%edx
> +	jmp 100f
> +39:
> +	lea (%rdx,%rcx,8),%rdx
> +	jmp 100f
> +40:
> +	movl %ecx,%edx
> +100:
> +	sfence
> +	movabsq	$0x8000000000000000, %rax
> +	orq %rdx,%rax

I think you want to write this as:

	mov %rdx, %rax
	bts $63, %rax

It cuts down instruction bytes by almost half and it is a bit more
readable:

  5c:   48 b8 00 00 00 00 00    movabs $0x8000000000000000,%rax
  63:   00 00 80
  66:   48 09 d0                or     %rdx,%rax

  5c:   48 89 d0                mov    %rdx,%rax
  5f:   48 0f ba e8 3f          bts    $0x3f,%rax


> +	ret

Also, you can drop the "l" suffix - default operand size is 32-bit in
long mode:

        .section .fixup,"ax"
30:
        add %ecx,%edx
        jmp 100f
31:
        shl $6,%ecx
        add %ecx,%edx
        jmp 100f
32:
        shl $6,%ecx
        lea -8(%ecx,%edx),%edx
        jmp 100f
33:
        shl $6,%ecx
        lea -16(%ecx,%edx),%edx
        jmp 100f
34:
        shl $6,%ecx
        lea -24(%ecx,%edx),%edx
        jmp 100f
35:
        shl $6,%ecx
        lea -32(%ecx,%edx),%edx
        jmp 100f
36:
        shl $6,%ecx
        lea -40(%ecx,%edx),%edx
        jmp 100f
37:
        shl $6,%ecx
        lea -48(%ecx,%edx),%edx
        jmp 100f
38:
        shl $6,%ecx
        lea -56(%ecx,%edx),%edx
        jmp 100f
39:
        lea (%rdx,%rcx,8),%rdx
        jmp 100f
40:
        mov %ecx,%edx
100:

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2015-12-22 11:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-16 16:39 [PATCHV3 0/3] Machine check recovery when kernel accesses poison Tony Luck
2015-12-16  1:29 ` [PATCHV3 1/3] x86, ras: Add new infrastructure for machine check fixup tables Tony Luck
2015-12-16 17:55   ` Andy Lutomirski
2015-12-16 22:51     ` Luck, Tony
2015-12-17 16:22       ` Andy Lutomirski
2015-12-21 18:18   ` Borislav Petkov
2015-12-21 19:16     ` Dan Williams
2015-12-21 20:15       ` Borislav Petkov
2015-12-22 11:13   ` Borislav Petkov
2015-12-16  1:29 ` [PATCHV3 2/3] x86, ras: Extend machine check recovery code to annotated ring0 areas Tony Luck
2015-12-22 11:14   ` Borislav Petkov
2015-12-16  1:30 ` [PATCHV3 3/3] x86, ras: Add mcsafe_memcpy() function to recover from machine checks Tony Luck
2015-12-22 11:13   ` Borislav Petkov [this message]
2015-12-22 19:38     ` Tony Luck
2015-12-23 12:58       ` Borislav Petkov
2015-12-23 19:31         ` Dan Williams
2015-12-23 20:46           ` Tony Luck
2015-12-24 13:37             ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151222111349.GB3728@pd.tnic \
    --to=bp@alien8.de \
    --cc=Elliott@pd.tnic \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=elliott@hpe.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@ml01.01.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox