linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Liguori <aliguori@us.ibm.com>
To: Andrea Arcangeli <andrea@qumranet.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>, Robin Holt <holt@sgi.com>,
	Jack Steiner <steiner@sgi.com>,
	Christoph Lameter <clameter@sgi.com>,
	akpm@linux-foundation.org, Nick Piggin <npiggin@suse.de>,
	Steve Wise <swise@opengridcomputing.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	linux-mm@kvack.org, Kanoj Sarcar <kanojsarcar@yahoo.com>,
	Roland Dreier <rdreier@cisco.com>,
	linux-kernel@vger.kernel.org, Avi Kivity <avi@qumranet.com>,
	kvm-devel@lists.sourceforge.net, general@lists.openfabrics.org,
	Hugh Dickins <hugh@veritas.com>, Chris Wright <chrisw@redhat.com>,
	Marcelo Tosatti <marcelo@kvack.org>
Subject: Re: mmu notifier #v14
Date: Sat, 26 Apr 2008 13:59:23 -0500	[thread overview]
Message-ID: <48137B8B.7010202@us.ibm.com> (raw)
In-Reply-To: <20080426164511.GJ9514@duo.random>

Andrea Arcangeli wrote:
> Hello everyone,
>
> here it is the mmu notifier #v14.
>
> 	http://www.kernel.org/pub/linux/kernel/people/andrea/patches/v2.6/2.6.25/mmu-notifier-v14/
>
> Please everyone involved review and (hopefully ;) ack that this is
> safe to go in 2.6.26, the most important is to verify that this is a
> noop when disarmed regardless of MMU_NOTIFIER=y or =n.
>
> 	http://www.kernel.org/pub/linux/kernel/people/andrea/patches/v2.6/2.6.25/mmu-notifier-v14/mmu-notifier-core
>
> I'll be sending that patch to Andrew inbox.
>
> Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 8d45fab..ce3251c 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -21,6 +21,7 @@ config KVM
>  	tristate "Kernel-based Virtual Machine (KVM) support"
>  	depends on HAVE_KVM
>  	select PREEMPT_NOTIFIERS
> +	select MMU_NOTIFIER
>  	select ANON_INODES
>  	---help---
>  	  Support hosting fully virtualized guest machines using hardware
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 2ad6f54..853087a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -663,6 +663,108 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
>  	account_shadowed(kvm, gfn);
>  }
>
> +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
> +{
> +	struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
> +	get_page(page);
>   

You should not assume a struct page exists for any given spte. Instead, 
use kvm_get_pfn() and kvm_release_pfn_clean().

>  static void nonpaging_free(struct kvm_vcpu *vcpu)
> @@ -1643,11 +1771,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>  	int r;
>  	u64 gpte = 0;
>  	pfn_t pfn;
> -
> -	vcpu->arch.update_pte.largepage = 0;
> +	int mmu_seq;
> +	int largepage;
>
>  	if (bytes != 4 && bytes != 8)
> -		return;
> +		goto out_lock;
>
>  	/*
>  	 * Assume that the pte write on a page table of the same type
> @@ -1660,7 +1788,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>  		if ((bytes == 4) && (gpa % 4 == 0)) {
>  			r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
>  			if (r)
> -				return;
> +				goto out_lock;
>  			memcpy((void *)&gpte + (gpa % 8), new, 4);
>  		} else if ((bytes == 8) && (gpa % 8 == 0)) {
>  			memcpy((void *)&gpte, new, 8);
> @@ -1670,23 +1798,35 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>  			memcpy((void *)&gpte, new, 4);
>  	}
>  	if (!is_present_pte(gpte))
> -		return;
> +		goto out_lock;
>  	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
>
> +	largepage = 0;
>  	down_read(&current->mm->mmap_sem);
>  	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
>  		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
> -		vcpu->arch.update_pte.largepage = 1;
> +		largepage = 1;
>  	}
> +	mmu_seq = atomic_read(&vcpu->kvm->arch.mmu_notifier_seq);
> +	/* implicit mb(), we'll read before PT lock is unlocked */
>  	pfn = gfn_to_pfn(vcpu->kvm, gfn);
>  	up_read(&current->mm->mmap_sem);
>
> -	if (is_error_pfn(pfn)) {
> -		kvm_release_pfn_clean(pfn);
> -		return;
> -	}
> +	if (is_error_pfn(pfn))
> +		goto out_release_and_lock;
> +
> +	spin_lock(&vcpu->kvm->mmu_lock);
> +	BUG_ON(!is_error_pfn(vcpu->arch.update_pte.pfn));
>  	vcpu->arch.update_pte.gfn = gfn;
>  	vcpu->arch.update_pte.pfn = pfn;
> +	vcpu->arch.update_pte.largepage = largepage;
> +	vcpu->arch.update_pte.mmu_seq = mmu_seq;
> +	return;
> +
> +out_release_and_lock:
> +	kvm_release_pfn_clean(pfn);
> +out_lock:
> +	spin_lock(&vcpu->kvm->mmu_lock);
>  }
>   

Perhaps I just have a weak stomach but I am uneasy having a function 
that takes a lock on exit. I walked through the logic and it doesn't 
appear to be wrong but it also is pretty clear that you could defer the 
acquisition of the lock to the caller (in this case, kvm_mmu_pte_write) 
by moving the update_pte assignment into kvm_mmu_pte_write.

>  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> @@ -1711,7 +1851,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>
>  	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
>  	mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
>   

Worst case, you pass 4 more pointer arguments here and, take the spin 
lock, and then depending on the result of mmu_guess_page_from_pte_write, 
update vcpu->arch.update_pte.

> @@ -3899,13 +4037,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> -	kvm_free_pit(kvm);
> -	kfree(kvm->arch.vpic);
> -	kfree(kvm->arch.vioapic);
> -	kvm_free_vcpus(kvm);
> -	kvm_free_physmem(kvm);
> -	if (kvm->arch.apic_access_page)
> -		put_page(kvm->arch.apic_access_page);
> +	/*
> +	 * kvm_mmu_notifier_release() will be called before
> +	 * mmu_notifier_unregister returns, if it didn't run
> +	 * already.
> +	 */
> +	mmu_notifier_unregister(&kvm->arch.mmu_notifier, kvm->mm);
>  	kfree(kvm);
>  }
>   

Why move the destruction of the vm to the MMU notifier unregister hook? 
Does anything else ever call mmu_notifier_unregister that would 
implicitly destroy the VM?

Regards,

Anthony Liguori

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2008-04-26 18:59 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-26 16:46 Andrea Arcangeli
2008-04-26 18:59 ` Anthony Liguori [this message]
2008-04-27  0:20   ` Andrea Arcangeli
2008-04-27  1:54     ` [kvm-devel] " Anthony Liguori
2008-04-27  3:05       ` Andrea Arcangeli
2008-04-28 14:48         ` Marcelo Tosatti
2008-05-01 18:12 ` mmu notifier-core v14->v15 diff for review Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48137B8B.7010202@us.ibm.com \
    --to=aliguori@us.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=andrea@qumranet.com \
    --cc=avi@qumranet.com \
    --cc=chrisw@redhat.com \
    --cc=clameter@sgi.com \
    --cc=general@lists.openfabrics.org \
    --cc=holt@sgi.com \
    --cc=hugh@veritas.com \
    --cc=kanojsarcar@yahoo.com \
    --cc=kvm-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=marcelo@kvack.org \
    --cc=npiggin@suse.de \
    --cc=rdreier@cisco.com \
    --cc=rusty@rustcorp.com.au \
    --cc=steiner@sgi.com \
    --cc=swise@opengridcomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox