linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Jonas Oberhauser <jonas.oberhauser@huaweicloud.com>
Cc: Boqun Feng <boqun.feng@gmail.com>,
	linux-kernel@vger.kernel.org, rcu@vger.kernel.org,
	linux-mm@kvack.org, lkmm@lists.linux.dev,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Neeraj Upadhyay <neeraj.upadhyay@kernel.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Josh Triplett <josh@joshtriplett.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Lai Jiangshan <jiangshanlai@gmail.com>,
	Zqiang <qiang.zhang1211@gmail.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Waiman Long <longman@redhat.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Kent Overstreet <kent.overstreet@gmail.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	maged.michael@gmail.com,
	Neeraj Upadhyay <neeraj.upadhyay@amd.com>
Subject: Re: [RFC PATCH 1/4] hazptr: Add initial implementation of hazard pointers
Date: Fri, 27 Sep 2024 03:20:40 +0200	[thread overview]
Message-ID: <bba2e656-4c3b-46db-b308-483de440b922@efficios.com> (raw)
In-Reply-To: <CAHk-=wjL803+FxtAPSGrWqThGQP5cCHzzwZJFq+-fkgt5DQ3VQ@mail.gmail.com>

On 2024-09-26 18:12, Linus Torvalds wrote:
> On Thu, 26 Sept 2024 at 08:54, Jonas Oberhauser
> <jonas.oberhauser@huaweicloud.com> wrote:
>>
>> No, the issue introduced by the compiler optimization (or by your
>> original patch) is that the CPU can speculatively load from the first
>> pointer as soon as it has completed the load of that pointer:
> 
> You mean the compiler can do it. The inline asm has no impact on what
> the CPU does. The conditional isn't a barrier for the actual hardware.
> But once the compiler doesn't try to do it, the data dependency on the
> address does end up being an ordering constraint on the hardware too
> (I'm happy to say that I haven't heard from the crazies that want
> value prediction in a long time).
> 
> Just use a barrier.  Or make sure to use the proper ordered memory
> accesses when possible. Don't use an inline asm for the compare - we
> don't even have anything insane like that as a portable helper, and we
> shouldn't have it.

How does the compiler barrier help in any way here ?

I am concerned about the compiler SSA GVN (Global Value Numbering)
optimizations, and I don't think a compiler barrier solves anything.
(or I'm missing something obvious)

I was concerned about the suggestion from Jonas to use "node2"
rather than "node" after the equality check as a way to ensure
the intended register is used to return the pointer, because after
the SSA GVN optimization pass, AFAIU this won't help in any way.
I have a set of examples below that show gcc use the result of the
first load, and clang use the result of the second load (on
both x86-64 and aarch64). Likewise when a load-acquire is used as
second load, which I find odd. Hopefully mixing this optimization
from gcc with speculation still abide by the memory model.

Only the asm goto approach ensures that gcc uses the result from
the second load.

#include <stdbool.h>

#define READ_ONCE(x)   \
     (*(__volatile__  __typeof__(x) *)&(x))

static inline
bool same_ptr(void *a, void *b)
{
     asm goto (
#ifdef __x86_64__
         "cmpq %[a], %[b]\n\t"
         "jne %l[ne]\n\t"
#elif defined(__aarch64__)
         "cmp %[a], %[b]\n\t"
         "bne %l[ne]\n\t"
#else
# error "unimplemented"
#endif
         : : [a] "r" (a), [b] "r" (b)
         : : ne);
     return true;
ne:
     return false;
}

int *p;

int fct_2_volatile(void)
{
     int *a, *b;

     do {
         a = READ_ONCE(p);
         asm volatile ("" : : : "memory");
         b = READ_ONCE(p);
     } while (a != b);
     return *b;
}

int fct_volatile_acquire(void)
{
     int *a, *b;

     do {
         a = READ_ONCE(p);
         asm volatile ("" : : : "memory");
         b = __atomic_load_n(&p, __ATOMIC_ACQUIRE);
     } while (a != b);
     return *b;
}

int fct_asm_compare(void)
{
     int *a, *b;

     do {
         a = READ_ONCE(p);
         asm volatile ("" : : : "memory");
         b = READ_ONCE(p);
     } while (!same_ptr(a, b));
     return *b;
}

x86-64 gcc 14.2:

fct_2_volatile:
  mov    rax,QWORD PTR [rip+0x0]        # 7 <fct_2_volatile+0x7>
  mov    rdx,QWORD PTR [rip+0x0]        # e <fct_2_volatile+0xe>
  cmp    rax,rdx
  jne    0 <fct_2_volatile>
  mov    eax,DWORD PTR [rax]
  ret
  cs nop WORD PTR [rax+rax*1+0x0]
fct_volatile_acquire:
  mov    rax,QWORD PTR [rip+0x0]        # 27 <fct_volatile_acquire+0x7>
  mov    rdx,QWORD PTR [rip+0x0]        # 2e <fct_volatile_acquire+0xe>
  cmp    rax,rdx
  jne    20 <fct_volatile_acquire>
  mov    eax,DWORD PTR [rax]
  ret
  cs nop WORD PTR [rax+rax*1+0x0]
fct_asm_compare:
  mov    rdx,QWORD PTR [rip+0x0]        # 47 <fct_asm_compare+0x7>
  mov    rax,QWORD PTR [rip+0x0]        # 4e <fct_asm_compare+0xe>
  cmp    rax,rdx
  jne    40 <fct_asm_compare>
  mov    eax,DWORD PTR [rax]
  ret
main:
  xor    eax,eax
  ret

x86-64 clang 19.1.0:

fct_2_volatile:
  mov    rcx,QWORD PTR [rip+0x0]        # 7 <fct_2_volatile+0x7>
  mov    rax,QWORD PTR [rip+0x0]        # e <fct_2_volatile+0xe>
  cmp    rcx,rax
  jne    0 <fct_2_volatile>
  mov    eax,DWORD PTR [rax]
  ret
  cs nop WORD PTR [rax+rax*1+0x0]
fct_volatile_acquire:
  mov    rcx,QWORD PTR [rip+0x0]        # 27 <fct_volatile_acquire+0x7>
  mov    rax,QWORD PTR [rip+0x0]        # 2e <fct_volatile_acquire+0xe>
  cmp    rcx,rax
  jne    20 <fct_volatile_acquire>
  mov    eax,DWORD PTR [rax]
  ret
  cs nop WORD PTR [rax+rax*1+0x0]
fct_asm_compare:
  mov    rcx,QWORD PTR [rip+0x0]        # 47 <fct_asm_compare+0x7>
  mov    rax,QWORD PTR [rip+0x0]        # 4e <fct_asm_compare+0xe>
  cmp    rax,rcx
  jne    40 <fct_asm_compare>
  mov    eax,DWORD PTR [rax]
  ret
  cs nop WORD PTR [rax+rax*1+0x0]
main:
  xor    eax,eax
  ret

ARM64 gcc 14.2.0:

fct_2_volatile:
         adrp    x0, .LANCHOR0
         add     x0, x0, :lo12:.LANCHOR0
.L2:
         ldr     x1, [x0]
         ldr     x2, [x0]
         cmp     x1, x2
         bne     .L2
         ldr     w0, [x1]
         ret
fct_volatile_acquire:
         adrp    x0, .LANCHOR0
         add     x0, x0, :lo12:.LANCHOR0
.L6:
         ldr     x1, [x0]
         ldar    x2, [x0]
         cmp     x1, x2
         bne     .L6
         ldr     w0, [x1]
         ret
fct_asm_compare:
         adrp    x1, .LANCHOR0
         add     x1, x1, :lo12:.LANCHOR0
.L9:
         ldr     x2, [x1]
         ldr     x0, [x1]
         cmp x2, x0
         bne .L9

         ldr     w0, [x0]
         ret
p:
         .zero   8

armv8-a clang (trunk):

fct_2_volatile:
  adrp	x8, 0 <fct_2_volatile>
  ldr	x10, [x8]
  ldr	x9, [x8]
  cmp	x10, x9
  b.ne	4 <fct_2_volatile+0x4>  // b.any
  ldr	w0, [x9]
  ret
fct_volatile_acquire:
  adrp	x8, 0 <fct_2_volatile>
  add	x8, x8, #0x0
  ldr	x10, [x8]
  ldar	x9, [x8]
  cmp	x10, x9
  b.ne	24 <fct_volatile_acquire+0x8>  // b.any
  ldr	w0, [x9]
  ret
fct_asm_compare:
  adrp	x8, 0 <fct_2_volatile>
  ldr	x9, [x8]
  ldr	x8, [x8]
  cmp	x9, x8
  b.ne	3c <fct_asm_compare>  // b.any
  ldr	w0, [x8]
  ret
main:
  mov	w0, wzr

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com



  parent reply	other threads:[~2024-09-27  1:22 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-17 14:33 [RFC PATCH 0/4] Add hazard pointers to kernel Boqun Feng
2024-09-17 14:33 ` [RFC PATCH 1/4] hazptr: Add initial implementation of hazard pointers Boqun Feng
2024-09-18  8:27   ` Mathieu Desnoyers
2024-09-18 15:17   ` Alan Huang
2024-09-19  6:56     ` Boqun Feng
2024-09-19 18:07       ` Jonas Oberhauser
2024-09-19  0:12   ` Jann Horn
2024-09-19 20:30     ` Jonas Oberhauser
2024-09-20  7:43       ` Jonas Oberhauser
2024-09-19  6:39   ` Lai Jiangshan
2024-09-19  7:10     ` Boqun Feng
2024-09-19 12:33       ` Alan Huang
2024-09-19 13:57       ` Alan Huang
2024-09-19 18:58         ` Boqun Feng
2024-09-19 19:53           ` Alan Huang
2024-09-19 16:10       ` Alan Huang
2024-09-19 14:00   ` Jonas Oberhauser
2024-09-20  7:41   ` Jonas Oberhauser
2024-09-25 10:02     ` Boqun Feng
2024-09-25 10:11       ` Jonas Oberhauser
2024-09-25 10:45         ` Boqun Feng
2024-09-25 11:59           ` Mathieu Desnoyers
2024-09-25 12:16             ` Boqun Feng
2024-09-25 12:47               ` Mathieu Desnoyers
2024-09-25 13:10                 ` Mathieu Desnoyers
2024-09-25 13:20                   ` Mathieu Desnoyers
2024-09-26  6:16                     ` Mathieu Desnoyers
2024-09-26 15:53                       ` Jonas Oberhauser
2024-09-26 16:12                         ` Linus Torvalds
2024-09-26 16:40                           ` Jonas Oberhauser
2024-09-26 16:54                             ` Linus Torvalds
2024-09-27  0:01                               ` Boqun Feng
2024-09-27  1:30                                 ` Mathieu Desnoyers
2024-09-27  1:37                                   ` Boqun Feng
2024-09-27  4:28                                     ` Boqun Feng
2024-09-27 10:59                                       ` Mathieu Desnoyers
2024-09-27 14:43                                         ` Mathieu Desnoyers
2024-09-27 15:22                                           ` Mathieu Desnoyers
2024-09-27 16:06                                       ` Alan Huang
2024-09-27 16:44                                     ` Linus Torvalds
2024-09-27 17:15                                       ` Mathieu Desnoyers
2024-09-27 17:23                                         ` Linus Torvalds
2024-09-27 17:51                                           ` Mathieu Desnoyers
2024-09-27 18:13                                             ` Linus Torvalds
2024-09-27 19:12                                               ` Jonas Oberhauser
2024-09-27 19:28                                                 ` Linus Torvalds
2024-09-27 20:24                                                   ` Linus Torvalds
2024-09-27 20:02                                               ` Mathieu Desnoyers
2024-09-27  1:20                           ` Mathieu Desnoyers [this message]
2024-09-27  4:38                             ` Boqun Feng
2024-09-27 19:23                               ` Jonas Oberhauser
2024-09-27 20:10                                 ` Mathieu Desnoyers
2024-09-27 22:18                                   ` Jonas Oberhauser
2024-09-28 22:10                                     ` Alan Huang
2024-09-28 23:12                                       ` Alan Huang
2024-09-25 12:19             ` Jonas Oberhauser
2024-09-17 14:34 ` [RFC PATCH 2/4] refscale: Add benchmarks for hazptr Boqun Feng
2024-09-17 14:34 ` [RFC PATCH 3/4] refscale: Add benchmarks for percpu_ref Boqun Feng
2024-09-17 14:34 ` [RFC PATCH 4/4] WIP: hazptr: Add hazptr test sample Boqun Feng
2024-09-18  7:18 ` [RFC PATCH 0/4] Add hazard pointers to kernel Linus Torvalds
2024-09-18 22:44   ` Neeraj Upadhyay
2024-09-19  6:46     ` Linus Torvalds
2024-09-20  5:00       ` Neeraj Upadhyay
2024-09-19 14:30     ` Mateusz Guzik
2024-09-19 14:14   ` Christoph Hellwig
2024-09-19 14:21     ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bba2e656-4c3b-46db-b308-483de440b922@efficios.com \
    --to=mathieu.desnoyers@efficios.com \
    --cc=boqun.feng@gmail.com \
    --cc=frederic@kernel.org \
    --cc=jiangshanlai@gmail.com \
    --cc=joel@joelfernandes.org \
    --cc=jonas.oberhauser@huaweicloud.com \
    --cc=josh@joshtriplett.org \
    --cc=kent.overstreet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lkmm@lists.linux.dev \
    --cc=longman@redhat.com \
    --cc=maged.michael@gmail.com \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=neeraj.upadhyay@amd.com \
    --cc=neeraj.upadhyay@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=qiang.zhang1211@gmail.com \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=urezki@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox