* [PATCH 3.16 131/136] x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
[not found] <lsq.1518323469.348919605@decadent.org.uk>
2018-02-11 4:31 ` [PATCH 3.16 134/136] x86/vdso: Remove pvclock fixmap machinery Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 130/136] x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader Ben Hutchings
@ 2018-02-11 4:31 ` Ben Hutchings
2 siblings, 0 replies; 3+ messages in thread
From: Ben Hutchings @ 2018-02-11 4:31 UTC (permalink / raw)
To: linux-kernel, stable
Cc: akpm, Brian Gerst, Thomas Gleixner, Linus Torvalds,
Denys Vlasenko, Ingo Molnar, Andy Lutomirski, linux-mm,
Andy Lutomirski, Paolo Bonzini, Borislav Petkov, Peter Zijlstra,
H. Peter Anvin
3.16.54-rc1 review patch. If anyone has any objections, please let me know.
------------------
From: Andy Lutomirski <luto@kernel.org>
commit dac16fba6fc590fa7239676b35ed75dae4c4cd2b upstream.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/9d37826fdc7e2d2809efe31d5345f97186859284.1449702533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.16: adjust filenames]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/vdso/vclock_gettime.c | 20 ++++++++------------
arch/x86/vdso/vdso-layout.lds.S | 3 ++-
arch/x86/vdso/vdso2c.c | 3 +++
arch/x86/vdso/vma.c | 13 +++++++++++++
arch/x86/include/asm/pvclock.h | 9 +++++++++
arch/x86/include/asm/vdso.h | 1 +
arch/x86/kernel/kvmclock.c | 5 +++++
7 files changed, 41 insertions(+), 13 deletions(-)
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void)
}
#endif
+#ifdef CONFIG_PARAVIRT_CLOCK
+extern u8 pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
#ifndef BUILD_VDSO32
#include <linux/kernel.h>
@@ -62,23 +67,14 @@ notrace static long vdso_fallback_gtod(s
#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
+static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
{
- const struct pvclock_vsyscall_time_info *pvti_base;
- int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
- int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
-
- BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
-
- pvti_base = (struct pvclock_vsyscall_time_info *)
- __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
-
- return &pvti_base[offset];
+ return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}
static notrace cycle_t vread_pvclock(int *mode)
{
- const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti;
+ const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
u64 tsc, pvti_tsc;
u64 last, delta, pvti_system_time;
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
- vvar_start = . - 2 * PAGE_SIZE;
+ vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
+ pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -23,6 +23,7 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@@ -30,6 +31,7 @@ enum {
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
};
struct vdso_sym {
@@ -41,6 +43,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
+ [sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -113,6 +113,7 @@ static int map_vdso(const struct vdso_im
.name = "[vvar]",
.pages = no_pages,
};
+ struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -182,6 +183,18 @@ static int map_vdso(const struct vdso_im
}
#endif
+ pvti = pvclock_pvti_cpu0_va();
+ if (pvti && image->sym_pvclock_page) {
+ ret = remap_pfn_range(vma,
+ text_start + image->sym_pvclock_page,
+ __pa(pvti) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
+ }
+
up_fail:
if (ret)
current->mm->context.vdso = NULL;
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -4,6 +4,15 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
+#ifdef CONFIG_PARAVIRT_CLOCK
+extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
+#else
+static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return NULL;
+}
+#endif
+
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
+ long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -44,6 +44,11 @@ early_param("no-kvmclock", parse_no_kvmc
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
+struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return hv_clock;
+}
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 3.16 130/136] x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
[not found] <lsq.1518323469.348919605@decadent.org.uk>
2018-02-11 4:31 ` [PATCH 3.16 134/136] x86/vdso: Remove pvclock fixmap machinery Ben Hutchings
@ 2018-02-11 4:31 ` Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 131/136] x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap Ben Hutchings
2 siblings, 0 replies; 3+ messages in thread
From: Ben Hutchings @ 2018-02-11 4:31 UTC (permalink / raw)
To: linux-kernel, stable
Cc: akpm, H. Peter Anvin, Peter Zijlstra, Borislav Petkov,
Paolo Bonzini, Andy Lutomirski, linux-mm, Ingo Molnar,
Denys Vlasenko, Linus Torvalds, Thomas Gleixner, Brian Gerst
3.16.54-rc1 review patch. If anyone has any objections, please let me know.
------------------
From: Andy Lutomirski <luto@amacapital.net>
commit 6b078f5de7fc0851af4102493c7b5bb07e49c4cb upstream.
The pvclock vdso code was too abstracted to understand easily
and excessively paranoid. Simplify it for a huge speedup.
This opens the door for additional simplifications, as the vdso
no longer accesses the pvti for any vcpu other than vcpu 0.
Before, vclock_gettime using kvm-clock took about 45ns on my
machine. With this change, it takes 29ns, which is almost as
fast as the pure TSC implementation.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/6b51dcc41f1b101f963945c5ec7093d72bdac429.1449702533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.16:
- Open-code rdtsc_ordered()
- Adjust filename]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -78,47 +78,59 @@ static notrace const struct pvclock_vsys
static notrace cycle_t vread_pvclock(int *mode)
{
- const struct pvclock_vsyscall_time_info *pvti;
+ const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti;
cycle_t ret;
- u64 last;
- u32 version;
- u8 flags;
- unsigned cpu, cpu1;
-
+ u64 tsc, pvti_tsc;
+ u64 last, delta, pvti_system_time;
+ u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
/*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
*
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
*/
- do {
- cpu = __getcpu() & VGETCPU_CPU_MASK;
- /* TODO: We can put vcpu id into higher bits of pvti.version.
- * This will save a couple of cycles by getting rid of
- * __getcpu() calls (Gleb).
- */
-
- pvti = get_pvti(cpu);
-
- version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
-
- /*
- * Test we're still on the cpu as well as the version.
- * We could have been migrated just after the first
- * vgetcpu but before fetching the version, so we
- * wouldn't notice a version change.
- */
- cpu1 = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1 ||
- (pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
- if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
+ return 0;
+ }
+
+ do {
+ version = pvti->version;
+
+ /* This is also a read barrier, so we'll read version first. */
+ rdtsc_barrier();
+ tsc = __native_read_tsc();
+
+ pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
+ pvti_tsc_shift = pvti->tsc_shift;
+ pvti_system_time = pvti->system_time;
+ pvti_tsc = pvti->tsc_timestamp;
+
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
+ } while (unlikely((version & 1) || version != pvti->version));
+
+ delta = tsc - pvti_tsc;
+ ret = pvti_system_time +
+ pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
+ pvti_tsc_shift);
/* refer to tsc.c read_tsc() comment for rationale */
last = gtod->cycle_last;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 3.16 134/136] x86/vdso: Remove pvclock fixmap machinery
[not found] <lsq.1518323469.348919605@decadent.org.uk>
@ 2018-02-11 4:31 ` Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 130/136] x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 131/136] x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap Ben Hutchings
2 siblings, 0 replies; 3+ messages in thread
From: Ben Hutchings @ 2018-02-11 4:31 UTC (permalink / raw)
To: linux-kernel, stable
Cc: akpm, Andy Lutomirski, Paolo Bonzini, Borislav Petkov,
Peter Zijlstra, H. Peter Anvin, Brian Gerst, Thomas Gleixner,
Linus Torvalds, Denys Vlasenko, Andy Lutomirski, Ingo Molnar,
linux-mm
3.16.54-rc1 review patch. If anyone has any objections, please let me know.
------------------
From: Andy Lutomirski <luto@kernel.org>
commit cc1e24fdb064d3126a494716f22ad4fc39306742 upstream.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/4933029991103ae44672c82b97a20035f5c1fe4f.1449702533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.16: adjust filenames, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/include/asm/fixmap.h | 5 -----
arch/x86/include/asm/pvclock.h | 5 -----
arch/x86/kernel/kvmclock.c | 6 ------
arch/x86/kernel/pvclock.c | 24 ------------------------
arch/x86/vdso/vclock_gettime.c | 1 -
arch/x86/vdso/vma.c | 1 +
6 files changed, 1 insertion(+), 41 deletions(-)
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -19,7 +19,6 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
-#include <asm/pvclock.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -70,10 +69,6 @@ enum fixed_addresses {
FIX_HOLE,
#else
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
-#ifdef CONFIG_PARAVIRT_CLOCK
- PVCLOCK_FIXMAP_BEGIN,
- PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
-#endif
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -107,10 +107,5 @@ struct pvclock_vsyscall_time_info {
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
-#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size);
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
#endif /* _ASM_X86_PVCLOCK_H */
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -278,7 +278,6 @@ int __init kvm_setup_vsyscall_timeinfo(v
{
#ifdef CONFIG_X86_64
int cpu;
- int ret;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
@@ -299,11 +298,6 @@ int __init kvm_setup_vsyscall_timeinfo(v
return 1;
}
- if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
- preempt_enable();
- return ret;
- }
-
preempt_enable();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclo
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
-
-#ifdef CONFIG_X86_64
-/*
- * Initialize the generic pvclock vsyscall state. This will allocate
- * a/some page(s) for the per-vcpu pvclock information, set up a
- * fixmap mapping for the page(s)
- */
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size)
-{
- int idx;
-
- WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
-
- for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
- __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
- __pa(i) + (idx*PAGE_SIZE),
- PAGE_KERNEL_VVAR);
- }
-
- return 0;
-}
-#endif
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -45,7 +45,6 @@ extern u8 pvclock_page
#include <linux/kernel.h>
#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
#include <asm/pvclock.h>
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -11,6 +11,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <asm/vsyscall.h>
+#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-02-11 4:34 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <lsq.1518323469.348919605@decadent.org.uk>
2018-02-11 4:31 ` [PATCH 3.16 134/136] x86/vdso: Remove pvclock fixmap machinery Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 130/136] x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader Ben Hutchings
2018-02-11 4:31 ` [PATCH 3.16 131/136] x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap Ben Hutchings
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox