[PATCH v2 09/12] Retry fault before vmentry

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Gleb Natapov <gleb@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com,
	mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de,
	hpa@zytor.com, riel@redhat.com
Subject: [PATCH v2 09/12] Retry fault before vmentry
Date: Mon, 23 Nov 2009 16:06:04 +0200	[thread overview]
Message-ID: <1258985167-29178-10-git-send-email-gleb@redhat.com> (raw)
In-Reply-To: <1258985167-29178-1-git-send-email-gleb@redhat.com>

When page is swapped in it is mapped into guest memory only after guest
tries to access it again and generate another fault. To save this fault
we can map it immediately since we know that guest is going to access
the page.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    6 +++++-
 arch/x86/kvm/mmu.c              |   15 +++++++++------
 arch/x86/kvm/paging_tmpl.h      |   38 +++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c              |    7 +++++++
 virt/kvm/kvm_main.c             |    2 ++
 5 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ad177a4..39009a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -254,7 +254,7 @@ struct kvm_pio_request {
  */
 struct kvm_mmu {
 	void (*new_cr3)(struct kvm_vcpu *vcpu);
-	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
+	int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gva, u32 err);
 	void (*free)(struct kvm_vcpu *vcpu);
 	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
 	void (*prefetch_page)(struct kvm_vcpu *vcpu,
@@ -542,6 +542,8 @@ struct kvm_x86_ops {
 
 struct kvm_arch_async_pf {
 	u32 token;
+	gpa_t cr3;
+	u32 error_code;
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -828,6 +830,8 @@ void kvm_arch_inject_async_page_not_present(struct kvm_vcpu *vcpu,
 					    struct kvm_async_pf *work);
 void kvm_arch_inject_async_page_present(struct kvm_vcpu *vcpu,
 					struct kvm_async_pf *work);
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work);
 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
 #endif /* _ASM_X86_KVM_HOST_H */
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2cdf3e3..1225c31 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2172,7 +2172,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 	return vaddr;
 }
 
-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gva,
 				u32 error_code)
 {
 	gfn_t gfn;
@@ -2192,10 +2192,13 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 			     error_code & PFERR_WRITE_MASK, gfn);
 }
 
-int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gva,
+			    gfn_t gfn, u32 error_code)
 {
 	struct kvm_arch_async_pf arch;
 	arch.token = (vcpu->arch.async_pf_id++ << 12) | vcpu->vcpu_id;
+	arch.cr3 = cr3;
+	arch.error_code = error_code;
 	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
 }
 
@@ -2207,7 +2210,7 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 	return !!kvm_x86_ops->get_cpl(vcpu);
 }
 
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gpa,
 				u32 error_code)
 {
 	pfn_t pfn;
@@ -2230,7 +2233,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (can_do_async_pf(vcpu)) {
+	if (cr3 == vcpu->arch.cr3 && can_do_async_pf(vcpu)) {
 		r = gfn_to_pfn_async(vcpu->kvm, gfn, &pfn);
 		trace_kvm_try_async_get_page(r, pfn);
 	} else {
@@ -2240,7 +2243,7 @@ do_sync:
 	}
 
 	if (!r) {
-		if (!kvm_arch_setup_async_pf(vcpu, gpa, gfn))
+		if (!kvm_arch_setup_async_pf(vcpu, cr3, gpa, gfn, error_code))
 			goto do_sync;
 		return 0;
 	}
@@ -2810,7 +2813,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 	int r;
 	enum emulation_result er;
 
-	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
+	r = vcpu->arch.mmu.page_fault(vcpu, vcpu->arch.cr3, cr2, error_code);
 	if (r < 0)
 		goto out;
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 44d19dc..702893c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -375,7 +375,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
  *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
  *           a negative value on error.
  */
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t addr,
 			       u32 error_code)
 {
 	int write_fault = error_code & PFERR_WRITE_MASK;
@@ -388,6 +388,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	pfn_t pfn;
 	int level = PT_PAGE_TABLE_LEVEL;
 	unsigned long mmu_seq;
+	gpa_t curr_cr3 = vcpu->arch.cr3;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 	kvm_mmu_audit(vcpu, "pre page fault");
@@ -396,6 +397,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (r)
 		return r;
 
+	if (curr_cr3 != cr3) {
+		/*
+		 * We do page fault on behaltf of a process that is sleeping
+		 * because of async PF. PV guest shouldn't kill process while
+		 * it waits for host to swap-in the page so cr3 has to be
+		 * valid here.
+		 */
+		vcpu->arch.cr3 = cr3;
+		paging_new_cr3(vcpu);
+		if (kvm_mmu_reload(vcpu))
+			goto switch_cr3;
+	}
+
 	/*
 	 * Look up the guest pte for the faulting address.
 	 */
@@ -406,6 +420,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	 * The page is not mapped by the guest.  Let the guest handle it.
 	 */
 	if (!r) {
+		if (curr_cr3 != vcpu->arch.cr3)
+			goto switch_cr3;
 		pgprintk("%s: guest page fault\n", __func__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
@@ -420,7 +436,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (can_do_async_pf(vcpu)) {
+	if (curr_cr3 == vcpu->arch.cr3 && can_do_async_pf(vcpu)) {
 		r = gfn_to_pfn_async(vcpu->kvm, walker.gfn, &pfn);
 		trace_kvm_try_async_get_page(r, pfn);
 	} else {
@@ -430,13 +446,18 @@ do_sync:
 	}
 
 	if (!r) {
-		if (!kvm_arch_setup_async_pf(vcpu, addr, walker.gfn))
+		if (!kvm_arch_setup_async_pf(vcpu, cr3, addr, walker.gfn,
+					     error_code))
 			goto do_sync;
+		if (curr_cr3 != vcpu->arch.cr3)
+			goto switch_cr3;
 		return 0;
 	}
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
+		if (curr_cr3 != vcpu->arch.cr3)
+			goto switch_cr3;
 		pgprintk("gfn %lx is mmio\n", walker.gfn);
 		kvm_release_pfn_clean(pfn);
 		return 1;
@@ -458,12 +479,23 @@ do_sync:
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
+	if (curr_cr3 != vcpu->arch.cr3)
+		goto switch_cr3;
+
 	return write_pt;
 
 out_unlock:
 	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
 	return 0;
+switch_cr3:
+	if (curr_cr3 != vcpu->arch.cr3) {
+		vcpu->arch.cr3 = curr_cr3;
+		paging_new_cr3(vcpu);
+		kvm_mmu_reload(vcpu);
+	}
+
+	return write_pt;
 }
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cbbe5fd..c29af1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5306,6 +5306,13 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 }
 EXPORT_SYMBOL_GPL(kvm_set_rflags);
 
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work)
+{
+	vcpu->arch.mmu.page_fault(vcpu, work->arch.cr3, work->gva,
+				  work->arch.error_code);
+}
+
 void kvm_arch_inject_async_page_not_present(struct kvm_vcpu *vcpu,
 					    struct kvm_async_pf *work)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 14ac02a..6e6769f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1212,6 +1212,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 			spin_lock(&vcpu->async_pf_lock);
 			list_del(&work->link);
 			spin_unlock(&vcpu->async_pf_lock);
+			kvm_arch_async_page_ready(vcpu, work);
 			put_page(work->page);
 			async_pf_work_free(work);
 		}
@@ -1226,6 +1227,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	list_del(&work->link);
 	spin_unlock(&vcpu->async_pf_lock);
 
+	kvm_arch_async_page_ready(vcpu, work);
 	kvm_arch_inject_async_page_present(vcpu, work);
 
 	put_page(work->page);
-- 
1.6.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2009-11-23 14:09 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-23 14:05 [PATCH v2 00/12] KVM: Add asynchronous page fault for PV guest Gleb Natapov
2009-11-23 14:05 ` [PATCH v2 01/12] Move kvm_smp_prepare_boot_cpu() from kvmclock.c to kvm.c Gleb Natapov
2009-11-23 14:05 ` [PATCH v2 02/12] Add PV MSR to enable asynchronous page faults delivery Gleb Natapov
2009-11-25 12:32   ` Avi Kivity
2009-11-23 14:05 ` [PATCH v2 03/12] Add async PF initialization to PV guest Gleb Natapov
2009-11-23 14:05 ` [PATCH v2 04/12] Add "handle page fault" PV helper Gleb Natapov
2009-11-23 15:32   ` Peter Zijlstra
2009-11-23 14:06 ` [PATCH v2 05/12] Handle asynchronous page fault in a PV guest Gleb Natapov
2009-11-25 12:45   ` Avi Kivity
2009-11-23 14:06 ` [PATCH v2 06/12] Export __get_user_pages_fast Gleb Natapov
2009-11-23 14:06 ` [PATCH v2 07/12] Add get_user_pages() variant that fails if major fault is required Gleb Natapov
2009-11-23 14:06 ` [PATCH v2 08/12] Inject asynchronous page fault into a guest if page is swapped out Gleb Natapov
2009-11-25 13:03   ` Avi Kivity
2009-11-23 14:06 ` Gleb Natapov [this message]
2009-11-25 13:09   ` [PATCH v2 09/12] Retry fault before vmentry Avi Kivity
2009-11-25 13:20     ` Gleb Natapov
2009-11-23 14:06 ` [PATCH v2 10/12] Maintain preemptability count even for !CONFIG_PREEMPT kernels Gleb Natapov
2009-11-23 15:34   ` Peter Zijlstra
2009-11-23 15:58     ` Gleb Natapov
2009-11-23 17:30       ` Christoph Lameter
2009-11-24  7:12         ` Gleb Natapov
2009-11-24 15:14           ` Christoph Lameter
2009-11-30 10:56             ` Gleb Natapov
2009-11-30 10:58               ` Gleb Natapov
2009-11-30 10:59                 ` Peter Zijlstra
2009-11-30 11:01                   ` Avi Kivity
2009-11-30 11:05                   ` Peter Zijlstra
2009-11-30 16:23                     ` Christoph Lameter
2009-11-23 14:06 ` [PATCH v2 11/12] Handle async PF in non preemptable context Gleb Natapov
2009-11-23 14:06 ` [PATCH v2 12/12] Send async PF when guest is not in userspace too Gleb Natapov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1258985167-29178-10-git-send-email-gleb@redhat.com \
    --to=gleb@redhat.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=avi@redhat.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox