From: Alexander Potapenko <glider@google.com>
To: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
Andy Lutomirski <luto@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
"the arch/x86 maintainers" <x86@kernel.org>,
Kostya Serebryany <kcc@google.com>,
Andrey Ryabinin <ryabinin.a.a@gmail.com>,
Andrey Konovalov <andreyknvl@gmail.com>,
Dmitry Vyukov <dvyukov@google.com>,
"H . J . Lu" <hjl.tools@gmail.com>,
Andi Kleen <ak@linux.intel.com>,
Rick Edgecombe <rick.p.edgecombe@intel.com>,
Linux Memory Management List <linux-mm@kvack.org>,
LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCHv4 4/8] x86/mm: Handle LAM on context switch
Date: Thu, 30 Jun 2022 10:36:19 +0200 [thread overview]
Message-ID: <CAG_fn=UhOYXHqYjzytjp9vx3w9rN=tfh1sY_=bLRYmj_fAJ+Hw@mail.gmail.com> (raw)
In-Reply-To: <20220622162230.83474-5-kirill.shutemov@linux.intel.com>
On Wed, Jun 22, 2022 at 6:22 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Linear Address Masking mode for userspace pointers encoded in CR3 bits.
> The mode is selected per-thread. Add new thread features indicate that the
> thread has Linear Address Masking enabled.
>
> switch_mm_irqs_off() now respects these flags and constructs CR3
> accordingly.
>
> The active LAM mode gets recorded in the tlb_state.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
> arch/x86/include/asm/mmu.h | 1 +
> arch/x86/include/asm/mmu_context.h | 24 +++++++++++
> arch/x86/include/asm/tlbflush.h | 4 ++
> arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++-------
> 4 files changed, 82 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 5d7494631ea9..d150e92163b6 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -40,6 +40,7 @@ typedef struct {
>
> #ifdef CONFIG_X86_64
> unsigned short flags;
> + u64 lam_cr3_mask;
> #endif
Can you please add a comment for this field?
>
> struct mutex lock;
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index b8d40ddeab00..e6eac047c728 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -91,6 +91,29 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
> }
> #endif
>
> +#ifdef CONFIG_X86_64
> +static inline u64 mm_cr3_lam_mask(struct mm_struct *mm)
> +{
> + return mm->context.lam_cr3_mask;
> +}
For the sake of uniformity, can it be either lam_cr3_mask or
cr3_lam_mask everywhere?
>
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 4af5579c7ef7..2d70d75e207f 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -100,6 +100,10 @@ struct tlb_state {
> */
> bool invalidate_other;
>
> +#ifdef CONFIG_X86_64
> + u8 lam;
> +#endif
> +
Comment here as well, please.
> /*
> * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
> * the corresponding user PCID needs a flush next time we
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index d400b6d9d246..c5c4f76329c2 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -154,17 +154,17 @@ static inline u16 user_pcid(u16 asid)
> return ret;
> }
>
> -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, u64 lam)
I think it's more natural to make `lam` an unsigned long, because cr3
is treated as unsigned long everywhere.
> {
> if (static_cpu_has(X86_FEATURE_PCID)) {
> - return __sme_pa(pgd) | kern_pcid(asid);
> + return __sme_pa(pgd) | kern_pcid(asid) | lam;
> } else {
> VM_WARN_ON_ONCE(asid != 0);
> - return __sme_pa(pgd);
> + return __sme_pa(pgd) | lam;
> }
> }
>
> -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid, u64 lam)
ditto
> {
> VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
> /*
> @@ -173,7 +173,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
> * boot because all CPU's the have same capabilities:
> */
> VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
> - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
> + return __sme_pa(pgd) | kern_pcid(asid) | lam | CR3_NOFLUSH;
> }
>
> /*
> @@ -274,15 +274,15 @@ static inline void invalidate_user_asid(u16 asid)
> (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
> }
>
> -static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
> +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, u64 lam, bool need_flush)
> {
> unsigned long new_mm_cr3;
>
> if (need_flush) {
> invalidate_user_asid(new_asid);
> - new_mm_cr3 = build_cr3(pgdir, new_asid);
> + new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
> } else {
> - new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
> + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
> }
>
> /*
> @@ -486,11 +486,38 @@ void cr4_update_pce(void *ignored)
> static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
> #endif
>
> +#ifdef CONFIG_X86_64
> +static inline u64 tlbstate_lam_cr3_mask(void)
Please add comments for these methods.
> +{
> + u64 lam = this_cpu_read(cpu_tlbstate.lam);
> +
> + return lam << X86_CR3_LAM_U57_BIT;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> + this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
> +}
> +
> +#else
> +
> +static inline u64 tlbstate_lam_cr3_mask(void)
> +{
> + return 0;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> +}
> +#endif
> +
> void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> struct task_struct *tsk)
> {
> struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
> u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
> + u64 prev_lam = tlbstate_lam_cr3_mask();
> + u64 new_lam = mm_cr3_lam_mask(next);
Ditto.
> bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
> unsigned cpu = smp_processor_id();
> u64 next_tlb_gen;
> @@ -504,6 +531,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> * cpu_tlbstate.loaded_mm) matches next.
> *
> * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
> + *
> + * NB: Initial LAM enabling calls us with prev == next. We must update
> + * CR3 if prev_lam doesn't match the new one.
> */
>
> /* We don't want flush_tlb_func() to run concurrently with us. */
> @@ -520,7 +550,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> * isn't free.
> */
> #ifdef CONFIG_DEBUG_VM
> - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
> + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
> /*
> * If we were to BUG here, we'd be very likely to kill
> * the system so hard that we don't see the call trace.
> @@ -551,7 +581,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> * provides that full memory barrier and core serializing
> * instruction.
> */
> - if (real_prev == next) {
> + if (real_prev == next && prev_lam == new_lam) {
Do we want the warning checks below to only happen if prev_lam==new_lam?
> VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
> next->context.ctx_id);
>
> @@ -622,15 +652,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> barrier();
> }
>
> + set_tlbstate_lam_cr3_mask(new_lam);
> if (need_flush) {
> this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
> this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
> - load_new_mm_cr3(next->pgd, new_asid, true);
> + load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
>
> trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> } else {
> /* The new ASID is already up to date. */
> - load_new_mm_cr3(next->pgd, new_asid, false);
> + load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
>
> trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
> }
> @@ -691,6 +722,10 @@ void initialize_tlbstate_and_flush(void)
> /* Assert that CR3 already references the right mm. */
> WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
>
> + /* LAM expected to be disabled in CR3 and init_mm */
> + WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
> + WARN_ON(mm_cr3_lam_mask(&init_mm));
> +
> /*
> * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
> * doesn't work like other CR4 bits because it can only be set from
> @@ -700,7 +735,7 @@ void initialize_tlbstate_and_flush(void)
> !(cr4_read_shadow() & X86_CR4_PCIDE));
>
> /* Force ASID 0 and force a TLB flush. */
Please update this comment.
> - write_cr3(build_cr3(mm->pgd, 0));
> + write_cr3(build_cr3(mm->pgd, 0, 0));
>
> /* Reinitialize tlbstate. */
> this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
> @@ -708,6 +743,7 @@ void initialize_tlbstate_and_flush(void)
> this_cpu_write(cpu_tlbstate.next_asid, 1);
> this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
> this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
> + set_tlbstate_lam_cr3_mask(0);
>
> for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
> this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
> @@ -1047,8 +1083,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> */
> unsigned long __get_current_cr3_fast(void)
> {
> - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> - this_cpu_read(cpu_tlbstate.loaded_mm_asid));
> + unsigned long cr3 =
> + build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> + this_cpu_read(cpu_tlbstate.loaded_mm_asid),
> + tlbstate_lam_cr3_mask());
>
> /* For now, be very restrictive about when this can be called. */
> VM_WARN_ON(in_nmi() || preemptible());
> --
> 2.35.1
>
--
Alexander Potapenko
Software Engineer
Google Germany GmbH
Erika-Mann-Straße, 33
80636 München
Geschäftsführer: Paul Manicle, Liana Sebastian
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Diese E-Mail ist vertraulich. Falls Sie diese fälschlicherweise
erhalten haben sollten, leiten Sie diese bitte nicht an jemand anderes
weiter, löschen Sie alle Kopien und Anhänge davon und lassen Sie mich
bitte wissen, dass die E-Mail an die falsche Person gesendet wurde.
This e-mail is confidential. If you received this communication by
mistake, please don't forward it to anyone else, please erase all
copies and attachments, and please let me know that it has gone to the
wrong person.
next prev parent reply other threads:[~2022-06-30 8:36 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-22 16:22 [PATCHv4 0/8] Linear Address Masking enabling Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 1/8] x86/mm: Fix CR3_ADDR_MASK Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 2/8] x86: CPUID and CR3/CR4 flags for Linear Address Masking Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 3/8] mm: Pass down mm_struct to untagged_addr() Kirill A. Shutemov
2022-07-05 15:42 ` Alexander Potapenko
2022-07-06 23:13 ` Kirill A. Shutemov
2022-07-07 8:56 ` Alexander Potapenko
2022-07-07 11:58 ` Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 4/8] x86/mm: Handle LAM on context switch Kirill A. Shutemov
2022-06-30 8:36 ` Alexander Potapenko [this message]
2022-06-22 16:22 ` [PATCHv4 5/8] x86/uaccess: Provide untagged_addr() and remove tags before address check Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 6/8] x86/mm: Provide ARCH_GET_UNTAG_MASK and ARCH_ENABLE_TAGGED_ADDR Kirill A. Shutemov
2022-07-12 13:12 ` Alexander Potapenko
2022-07-12 17:14 ` Kirill A. Shutemov
2022-07-14 14:28 ` Alexander Potapenko
2022-07-14 18:12 ` Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 7/8] x86: Expose untagging mask in /proc/$PID/arch_status Kirill A. Shutemov
2022-06-22 16:22 ` [PATCHv4 OPTIONAL 8/8] x86/mm: Extend LAM to support to LAM_U48 Kirill A. Shutemov
2022-06-30 10:06 ` Alexander Potapenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAG_fn=UhOYXHqYjzytjp9vx3w9rN=tfh1sY_=bLRYmj_fAJ+Hw@mail.gmail.com' \
--to=glider@google.com \
--cc=ak@linux.intel.com \
--cc=andreyknvl@gmail.com \
--cc=dave.hansen@linux.intel.com \
--cc=dvyukov@google.com \
--cc=hjl.tools@gmail.com \
--cc=kcc@google.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@kernel.org \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=ryabinin.a.a@gmail.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox