From: Alexandru Elisei <alexandru.elisei@arm.com>
To: catalin.marinas@arm.com, will@kernel.org, oliver.upton@linux.dev,
maz@kernel.org, james.morse@arm.com, suzuki.poulose@arm.com,
yuzenghui@huawei.com, arnd@arndb.de, akpm@linux-foundation.org,
mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com, vschneid@redhat.com, mhiramat@kernel.org,
rppt@kernel.org, hughd@google.com
Cc: pcc@google.com, steven.price@arm.com, anshuman.khandual@arm.com,
vincenzo.frascino@arm.com, david@redhat.com, eugenis@google.com,
kcc@google.com, hyesoo.yu@samsung.com,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, kvmarm@lists.linux.dev,
linux-fsdevel@vger.kernel.org, linux-arch@vger.kernel.org,
linux-mm@kvack.org, linux-trace-kernel@vger.kernel.org
Subject: [PATCH RFC v3 33/35] KVM: arm64: mte: Introduce VM_MTE_KVM VMA flag
Date: Thu, 25 Jan 2024 16:42:54 +0000 [thread overview]
Message-ID: <20240125164256.4147-34-alexandru.elisei@arm.com> (raw)
In-Reply-To: <20240125164256.4147-1-alexandru.elisei@arm.com>
Tag storage pages mapped by the host in a VM with MTE enabled are migrated
when they are first accessed by the guest. This introduces latency spikes
for memory accesses made by the guest.
Tag storage pages can be mapped in the guest memory when the VM_MTE VMA
flag is not set. Introduce a new VMA flag, VM_MTE_KVM, to stop tag storage
pages from being mapped in a VM with MTE enabled.
The flag is different from VM_MTE, because the pages from the VMA won't be
mapped as tagged in the host, and host's userspace can continue to access
the guest memory as Untagged. The flag's only function is to instruct the
page allocator to treat the allocation as tagged, so tag storage pages
aren't used. The page allocator will also try to reserve tag storage for
the new page, which can speed up stage 2 aborts further if the VMM has
accessed the memory before the guest. For example, qemu and kvmtool will
benefit from this change because the guest image is copied after the
memslot is created.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
Changes since rfc v2:
* New patch.
arch/arm64/kvm/mmu.c | 77 ++++++++++++++++++++++++++++++++++++++++++-
arch/arm64/mm/fault.c | 2 +-
include/linux/mm.h | 2 ++
3 files changed, 79 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 986a9544228d..45c57c4b9fe2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1420,7 +1420,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *old_vma;
short vma_shift;
gfn_t gfn;
kvm_pfn_t pfn;
@@ -1428,6 +1428,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
+ bool vma_has_kvm_mte = false;
if (fault_is_perm)
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
@@ -1506,6 +1507,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn = fault_ipa >> PAGE_SHIFT;
mte_allowed = kvm_vma_mte_allowed(vma);
+ vma_has_kvm_mte = !!(vma->vm_flags & VM_MTE_KVM);
+ old_vma = vma;
/* Don't use the VMA after the unlock -- it may have vanished */
vma = NULL;
@@ -1521,6 +1524,27 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
+ /*
+ * If the VMA was created after the memslot, it doesn't have the
+ * VM_MTE_KVM flag set.
+ */
+ if (unlikely(tag_storage_enabled() && !fault_is_perm &&
+ kvm_has_mte(kvm) && mte_allowed && !vma_has_kvm_mte)) {
+ mmap_write_lock(current->mm);
+ vma = vma_lookup(current->mm, hva);
+ /* The VMA was changed, replay the fault. */
+ if (vma != old_vma) {
+ mmap_write_unlock(current->mm);
+ return 0;
+ }
+ if (!(vma->vm_flags & VM_MTE_KVM)) {
+ vma_start_write(vma);
+ vm_flags_reset(vma, vma->vm_flags | VM_MTE_KVM);
+ }
+ vma = NULL;
+ mmap_write_unlock(current->mm);
+ }
+
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
write_fault, &writable, NULL);
@@ -1986,6 +2010,40 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
return err;
}
+static int kvm_set_clear_kvm_mte_vma(const struct kvm_memory_slot *memslot, bool set)
+{
+ struct vm_area_struct *vma;
+ hva_t hva, memslot_end;
+ int ret = 0;
+
+ hva = memslot->userspace_addr;
+ memslot_end = hva + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(current->mm);
+
+ do {
+ vma = find_vma_intersection(current->mm, hva, memslot_end);
+ if (!vma)
+ break;
+ if (!kvm_vma_mte_allowed(vma))
+ continue;
+ if (set) {
+ if (!(vma->vm_flags & VM_MTE_KVM)) {
+ vma_start_write(vma);
+ vm_flags_reset(vma, vma->vm_flags | VM_MTE_KVM);
+ }
+ } else if (vma->vm_flags & VM_MTE_KVM) {
+ vma_start_write(vma);
+ vm_flags_reset(vma, vma->vm_flags & ~VM_MTE_KVM);
+ }
+ hva = min(memslot_end, vma->vm_end);
+ } while (hva < memslot_end);
+
+ mmap_write_unlock(current->mm);
+
+ return ret;
+}
+
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
@@ -1993,6 +2051,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+ if (kvm_has_mte(kvm) && change != KVM_MR_FLAGS_ONLY) {
+ switch (change) {
+ case KVM_MR_CREATE:
+ kvm_set_clear_kvm_mte_vma(new, true);
+ break;
+ case KVM_MR_DELETE:
+ kvm_set_clear_kvm_mte_vma(old, false);
+ break;
+ case KVM_MR_MOVE:
+ kvm_set_clear_kvm_mte_vma(old, false);
+ kvm_set_clear_kvm_mte_vma(new, true);
+ break;
+ default:
+ WARN(true, "Unknown memslot change");
+ }
+ }
+
/*
* At this point memslot has been committed and there is an
* allocated dirty_bitmap[], dirty pages will be tracked while the
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5c12232bdf0b..f4ca3ba8dde7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -947,7 +947,7 @@ NOKPROBE_SYMBOL(do_debug_exception);
*/
gfp_t arch_calc_vma_gfp(struct vm_area_struct *vma, gfp_t gfp)
{
- if (vma->vm_flags & VM_MTE)
+ if (vma->vm_flags & (VM_MTE |VM_MTE_KVM))
return __GFP_TAGGED;
return 0;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f5a97dec5169..924aa7c26ec9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -375,9 +375,11 @@ extern unsigned int kobjsize(const void *objp);
#if defined(CONFIG_ARM64_MTE)
# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */
# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */
+# define VM_MTE_KVM VM_HIGH_ARCH_2 /* VMA is mapped in a virtual machine with MTE */
#else
# define VM_MTE VM_NONE
# define VM_MTE_ALLOWED VM_NONE
+# define VM_MTE_KVM VM_NONE
#endif
#ifndef VM_GROWSUP
--
2.43.0
next prev parent reply other threads:[~2024-01-25 16:46 UTC|newest]
Thread overview: 95+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-25 16:42 [PATCH RFC v3 00/35] Add support for arm64 MTE dynamic tag storage reuse Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 01/35] mm: page_alloc: Add gfp_flags parameter to arch_alloc_page() Alexandru Elisei
2024-01-29 5:48 ` Anshuman Khandual
2024-01-29 11:41 ` Alexandru Elisei
2024-01-30 4:26 ` Anshuman Khandual
2024-01-30 11:56 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 02/35] mm: page_alloc: Add an arch hook early in free_pages_prepare() Alexandru Elisei
2024-01-29 8:19 ` Anshuman Khandual
2024-01-29 11:42 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 03/35] mm: page_alloc: Add an arch hook to filter MIGRATE_CMA allocations Alexandru Elisei
2024-01-29 8:44 ` Anshuman Khandual
2024-01-29 11:45 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 04/35] mm: page_alloc: Partially revert "mm: page_alloc: remove stale CMA guard code" Alexandru Elisei
2024-01-29 9:01 ` Anshuman Khandual
2024-01-29 11:46 ` Alexandru Elisei
2024-01-30 4:34 ` Anshuman Khandual
2024-01-30 11:57 ` Alexandru Elisei
2024-01-31 3:27 ` Anshuman Khandual
2024-01-25 16:42 ` [PATCH RFC v3 05/35] mm: cma: Don't append newline when generating CMA area name Alexandru Elisei
2024-01-29 9:13 ` Anshuman Khandual
2024-01-29 11:46 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 06/35] mm: cma: Make CMA_ALLOC_SUCCESS/FAIL count the number of pages Alexandru Elisei
2024-01-29 9:24 ` Anshuman Khandual
2024-01-29 11:51 ` Alexandru Elisei
2024-01-30 4:52 ` Anshuman Khandual
2024-01-30 11:58 ` Alexandru Elisei
2024-01-31 4:40 ` Anshuman Khandual
2024-01-31 13:27 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 07/35] mm: cma: Add CMA_RELEASE_{SUCCESS,FAIL} events Alexandru Elisei
2024-01-29 9:31 ` Anshuman Khandual
2024-01-29 11:53 ` Alexandru Elisei
2024-01-31 5:59 ` Anshuman Khandual
2024-01-25 16:42 ` [PATCH RFC v3 08/35] mm: cma: Introduce cma_alloc_range() Alexandru Elisei
2024-01-30 5:20 ` Anshuman Khandual
2024-01-30 11:35 ` Alexandru Elisei
2024-01-31 6:24 ` Anshuman Khandual
2024-01-31 14:18 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 09/35] mm: cma: Introduce cma_remove_mem() Alexandru Elisei
2024-01-30 5:50 ` Anshuman Khandual
2024-01-30 11:33 ` Alexandru Elisei
2024-01-31 13:19 ` Anshuman Khandual
2024-01-31 13:48 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 10/35] mm: cma: Fast track allocating memory when the pages are free Alexandru Elisei
2024-01-30 9:18 ` Anshuman Khandual
2024-01-30 11:34 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 11/35] mm: Allow an arch to hook into folio allocation when VMA is known Alexandru Elisei
2024-01-26 20:00 ` Peter Collingbourne
2024-01-29 11:59 ` Alexandru Elisei
2024-01-30 9:55 ` Anshuman Khandual
2024-01-30 11:34 ` Alexandru Elisei
2024-01-31 6:53 ` Anshuman Khandual
2024-01-31 12:22 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 12/35] mm: Call arch_swap_prepare_to_restore() before arch_swap_restore() Alexandru Elisei
2024-02-01 3:30 ` Anshuman Khandual
2024-02-01 17:32 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 13/35] mm: memory: Introduce fault-on-access mechanism for pages Alexandru Elisei
2024-02-01 5:52 ` Anshuman Khandual
2024-02-01 17:36 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 14/35] of: fdt: Return the region size in of_flat_dt_translate_address() Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 15/35] of: fdt: Add of_flat_read_u32() Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 16/35] KVM: arm64: Don't deny VM_PFNMAP VMAs when kvm_has_mte() Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 17/35] arm64: mte: Rework naming for tag manipulation functions Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 18/35] arm64: mte: Rename __GFP_ZEROTAGS to __GFP_TAGGED Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 19/35] arm64: mte: Discover tag storage memory Alexandru Elisei
2024-01-26 8:50 ` Krzysztof Kozlowski
2024-01-26 17:01 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 20/35] arm64: mte: Add tag storage memory to CMA Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 21/35] arm64: mte: Disable dynamic tag storage management if HW KASAN is enabled Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 22/35] arm64: mte: Enable tag storage if CMA areas have been activated Alexandru Elisei
2024-02-02 22:30 ` Evgenii Stepanov
2024-02-05 16:30 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 23/35] arm64: mte: Try to reserve tag storage in arch_alloc_page() Alexandru Elisei
2024-01-30 0:04 ` Peter Collingbourne
2024-01-30 11:38 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 24/35] arm64: mte: Perform CMOs for tag blocks Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 25/35] arm64: mte: Reserve tag block for the zero page Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 26/35] arm64: mte: Use fault-on-access to reserve missing tag storage Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 27/35] arm64: mte: Handle tag storage pages mapped in an MTE VMA Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 28/35] arm64: mte: swap: Handle tag restoring when missing tag storage Alexandru Elisei
2024-02-02 4:02 ` Peter Collingbourne
2024-02-02 14:56 ` Alexandru Elisei
2024-02-03 1:32 ` Evgenii Stepanov
2024-02-03 1:52 ` Peter Collingbourne
2024-01-25 16:42 ` [PATCH RFC v3 29/35] arm64: mte: copypage: " Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 30/35] arm64: mte: ptrace: Handle pages with " Alexandru Elisei
2024-02-01 9:21 ` Anshuman Khandual
2024-02-01 17:38 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 31/35] khugepaged: arm64: Don't collapse MTE enabled VMAs Alexandru Elisei
2024-02-01 8:12 ` Anshuman Khandual
2024-02-01 17:38 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 32/35] KVM: arm64: mte: Reserve tag storage for virtual machines with MTE Alexandru Elisei
2024-01-25 16:42 ` Alexandru Elisei [this message]
2024-01-25 16:42 ` [PATCH RFC v3 34/35] arm64: mte: Enable dynamic tag storage management Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 35/35] HACK! arm64: dts: Add fake tag storage to fvp-base-revc.dts Alexandru Elisei
2024-01-25 17:01 ` [PATCH RFC v3 00/35] Add support for arm64 MTE dynamic tag storage reuse Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240125164256.4147-34-alexandru.elisei@arm.com \
--to=alexandru.elisei@arm.com \
--cc=akpm@linux-foundation.org \
--cc=anshuman.khandual@arm.com \
--cc=arnd@arndb.de \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=catalin.marinas@arm.com \
--cc=david@redhat.com \
--cc=dietmar.eggemann@arm.com \
--cc=eugenis@google.com \
--cc=hughd@google.com \
--cc=hyesoo.yu@samsung.com \
--cc=james.morse@arm.com \
--cc=juri.lelli@redhat.com \
--cc=kcc@google.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arch@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=mgorman@suse.de \
--cc=mhiramat@kernel.org \
--cc=mingo@redhat.com \
--cc=oliver.upton@linux.dev \
--cc=pcc@google.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=rppt@kernel.org \
--cc=steven.price@arm.com \
--cc=suzuki.poulose@arm.com \
--cc=vincent.guittot@linaro.org \
--cc=vincenzo.frascino@arm.com \
--cc=vschneid@redhat.com \
--cc=will@kernel.org \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox