From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from psmtp.com (na3sys010amx129.postini.com [74.125.245.129]) by kanga.kvack.org (Postfix) with SMTP id 661616B0032 for ; Tue, 2 Jul 2013 11:17:15 -0400 (EDT) Message-ID: <51D2EEF7.8000607@suse.de> Date: Tue, 02 Jul 2013 17:17:11 +0200 From: Alexander Graf MIME-Version: 1.0 Subject: Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation References: <1372743918-12293-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com> <1372743918-12293-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com> In-Reply-To: <1372743918-12293-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: "Aneesh Kumar K.V" Cc: benh@kernel.crashing.org, paulus@samba.org, m.szyprowski@samsung.com, mina86@mina86.com, linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote: > From: "Aneesh Kumar K.V" > > Older version of power architecture use Real Mode Offset register and Real Mode Limit > Selector for mapping guest Real Mode Area. The guest RMA should be physically > contigous since we use the range when address translation is not enabled. > > This patch switch RMA allocation code to use contigous memory allocator. The patch > also remove the the linear allocator which not used any more > > Acked-by: Paul Mackerras > Signed-off-by: Aneesh Kumar K.V > --- > arch/powerpc/include/asm/kvm_book3s_64.h | 1 + > arch/powerpc/include/asm/kvm_host.h | 12 +-- > arch/powerpc/include/asm/kvm_ppc.h | 8 +- > arch/powerpc/kernel/setup_64.c | 2 - > arch/powerpc/kvm/book3s_hv.c | 27 +++-- > arch/powerpc/kvm/book3s_hv_builtin.c | 167 ++++++++----------------------- > 6 files changed, 65 insertions(+), 152 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h > index f8355a9..76ff0b5 100644 > --- a/arch/powerpc/include/asm/kvm_book3s_64.h > +++ b/arch/powerpc/include/asm/kvm_book3s_64.h > @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) > > #ifdef CONFIG_KVM_BOOK3S_64_HV > #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ > +extern unsigned long kvm_rma_pages; > #endif > > #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index 0097dab..3328353 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table { > struct page *pages[0]; > }; > > -struct kvmppc_linear_info { > - void *base_virt; > - unsigned long base_pfn; > - unsigned long npages; > - struct list_head list; > - atomic_t use_count; > - int type; > +struct kvm_rma_info { > + atomic_t use_count; > + unsigned long base_pfn; > }; > > /* XICS components, defined in book3s_xics.c */ > @@ -246,7 +242,7 @@ struct kvm_arch { > int tlbie_lock; > unsigned long lpcr; > unsigned long rmor; > - struct kvmppc_linear_info *rma; > + struct kvm_rma_info *rma; > unsigned long vrma_slb_v; > int rma_setup_done; > int using_mmu_notifiers; > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index b5ef7a3..5a26bfc 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, > unsigned long ioba, unsigned long tce); > extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, > struct kvm_allocate_rma *rma); > -extern struct kvmppc_linear_info *kvm_alloc_rma(void); > -extern void kvm_release_rma(struct kvmppc_linear_info *ri); > +extern struct kvm_rma_info *kvm_alloc_rma(void); > +extern void kvm_release_rma(struct kvm_rma_info *ri); > extern struct page *kvm_alloc_hpt(unsigned long nr_pages); > extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); > extern int kvmppc_core_init_vm(struct kvm *kvm); > @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) > } > > extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); > -extern void kvm_linear_init(void); > > #else > static inline void __init kvm_cma_reserve(void) > @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void) > static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) > {} > > -static inline void kvm_linear_init(void) > -{} > - > static inline u32 kvmppc_get_xics_latch(void) > { > return 0; > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index ee28d1f..8a022f5 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p) > /* Initialize the MMU context management stuff */ > mmu_context_init(); > > - kvm_linear_init(); > - > /* Interrupt code needs to be 64K-aligned */ > if ((unsigned long)_stext& 0xffff) > panic("Kernelbase not 64K-aligned (0x%lx)!\n", > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 550f592..55c8519 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size) > > static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > { > - struct kvmppc_linear_info *ri = vma->vm_file->private_data; > struct page *page; > + struct kvm_rma_info *ri = vma->vm_file->private_data; > > - if (vmf->pgoff>= ri->npages) > + if (vmf->pgoff>= kvm_rma_pages) > return VM_FAULT_SIGBUS; > > page = pfn_to_page(ri->base_pfn + vmf->pgoff); > @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) > > static int kvm_rma_release(struct inode *inode, struct file *filp) > { > - struct kvmppc_linear_info *ri = filp->private_data; > + struct kvm_rma_info *ri = filp->private_data; > > kvm_release_rma(ri); > return 0; > @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = { > > long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) > { > - struct kvmppc_linear_info *ri; > long fd; > + struct kvm_rma_info *ri; > + /* > + * Only do this on PPC970 in HV mode > + */ > + if (!cpu_has_feature(CPU_FTR_HVMODE) || > + !cpu_has_feature(CPU_FTR_ARCH_201)) > + return -EINVAL; Is this really what we want? User space may want to use an RMA on POWER7 systems, no? > + > + if (!kvm_rma_pages) > + return -EINVAL; > > ri = kvm_alloc_rma(); > if (!ri) > @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) > if (fd< 0) > kvm_release_rma(ri); > > - ret->rma_size = ri->npages<< PAGE_SHIFT; > + ret->rma_size = kvm_rma_pages<< PAGE_SHIFT; > return fd; > } > > @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) > { > int err = 0; > struct kvm *kvm = vcpu->kvm; > - struct kvmppc_linear_info *ri = NULL; > + struct kvm_rma_info *ri = NULL; > unsigned long hva; > struct kvm_memory_slot *memslot; > struct vm_area_struct *vma; > @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) > > } else { > /* Set up to use an RMO region */ > - rma_size = ri->npages; > + rma_size = kvm_rma_pages; > if (rma_size> memslot->npages) > rma_size = memslot->npages; > rma_size<<= PAGE_SHIFT; > @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) > /* POWER7 */ > lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L); > lpcr |= rmls<< LPCR_RMLS_SH; > - kvm->arch.rmor = kvm->arch.rma->base_pfn<< PAGE_SHIFT; > + kvm->arch.rmor = ri->base_pfn<< PAGE_SHIFT; > } > kvm->arch.lpcr = lpcr; > pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", > ri->base_pfn<< PAGE_SHIFT, rma_size, lpcr); > > /* Initialize phys addrs of pages in RMO */ > - npages = ri->npages; > + npages = kvm_rma_pages; > porder = __ilog2(npages); > physp = memslot->arch.slot_phys; > if (physp) { > diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c > index 4b865c5..8cd0dae 100644 > --- a/arch/powerpc/kvm/book3s_hv_builtin.c > +++ b/arch/powerpc/kvm/book3s_hv_builtin.c > @@ -21,13 +21,6 @@ > #include > > #include "book3s_hv_cma.h" > - > -#define KVM_LINEAR_RMA 0 > -#define KVM_LINEAR_HPT 1 > - > -static void __init kvm_linear_init_one(ulong size, int count, int type); > -static struct kvmppc_linear_info *kvm_alloc_linear(int type); > -static void kvm_release_linear(struct kvmppc_linear_info *ri); > /* > * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) > * should be power of 2. > @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri); > * By default we reserve 5% of memory for hash pagetable allocation. > */ > static unsigned long kvm_cma_resv_ratio = 5; > - > -/*************** RMA *************/ > - > /* > - * This maintains a list of RMAs (real mode areas) for KVM guests to use. > + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. > * Each RMA has to be physically contiguous and of a size that the > * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, > * and other larger sizes. Since we are unlikely to be allocate that > * much physically contiguous memory after the system is up and running, > - * we preallocate a set of RMAs in early boot for KVM to use. > + * we preallocate a set of RMAs in early boot using CMA. > + * should be power of 2. > */ > -static unsigned long kvm_rma_size = 64<< 20; /* 64MB */ > -static unsigned long kvm_rma_count; > +unsigned long kvm_rma_pages = (1<< 27)>> PAGE_SHIFT; /* 128MB */ > +EXPORT_SYMBOL_GPL(kvm_rma_pages); > > /* Work out RMLS (real mode limit selector) field value for a given RMA size. > Assumes POWER7 or PPC970. */ > @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size) > > static int __init early_parse_rma_size(char *p) > { > - if (!p) > - return 1; > + unsigned long kvm_rma_size; > > + pr_debug("%s(%s)\n", __func__, p); > + if (!p) > + return -EINVAL; > kvm_rma_size = memparse(p,&p); > - > + /* > + * Check that the requested size is one supported in hardware > + */ > + if (lpcr_rmls(kvm_rma_size)< 0) { > + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); > + return -EINVAL; > + } > + kvm_rma_pages = kvm_rma_size>> PAGE_SHIFT; > return 0; > } > early_param("kvm_rma_size", early_parse_rma_size); > > -static int __init early_parse_rma_count(char *p) > +struct kvm_rma_info *kvm_alloc_rma() > { > - if (!p) > - return 1; > - > - kvm_rma_count = simple_strtoul(p, NULL, 0); > - > - return 0; > -} > -early_param("kvm_rma_count", early_parse_rma_count); > - > -struct kvmppc_linear_info *kvm_alloc_rma(void) > -{ > - return kvm_alloc_linear(KVM_LINEAR_RMA); > + struct page *page; > + struct kvm_rma_info *ri; > + > + ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); > + if (!ri) > + return NULL; > + page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); > + if (!page) > + goto err_out; > + atomic_set(&ri->use_count, 1); > + ri->base_pfn = page_to_pfn(page); > + return ri; > +err_out: > + kfree(ri); > + return NULL; > } > EXPORT_SYMBOL_GPL(kvm_alloc_rma); > > -void kvm_release_rma(struct kvmppc_linear_info *ri) > +void kvm_release_rma(struct kvm_rma_info *ri) > { > - kvm_release_linear(ri); > + if (atomic_dec_and_test(&ri->use_count)) { > + kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); > + kfree(ri); > + } > } > EXPORT_SYMBOL_GPL(kvm_release_rma); > > @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages) > } > EXPORT_SYMBOL_GPL(kvm_release_hpt); > > -/*************** generic *************/ > - > -static LIST_HEAD(free_linears); > -static DEFINE_SPINLOCK(linear_lock); > - > -static void __init kvm_linear_init_one(ulong size, int count, int type) Please split the linar removal bits out into a separate patch :). Alex > -{ > - unsigned long i; > - unsigned long j, npages; > - void *linear; > - struct page *pg; > - const char *typestr; > - struct kvmppc_linear_info *linear_info; > - > - if (!count) > - return; > - > - typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; > - > - npages = size>> PAGE_SHIFT; > - linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); > - for (i = 0; i< count; ++i) { > - linear = alloc_bootmem_align(size, size); > - pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, > - size>> 20); > - linear_info[i].base_virt = linear; > - linear_info[i].base_pfn = __pa(linear)>> PAGE_SHIFT; > - linear_info[i].npages = npages; > - linear_info[i].type = type; > - list_add_tail(&linear_info[i].list,&free_linears); > - atomic_set(&linear_info[i].use_count, 0); > - > - pg = pfn_to_page(linear_info[i].base_pfn); > - for (j = 0; j< npages; ++j) { > - atomic_inc(&pg->_count); > - ++pg; > - } > - } > -} > - > -static struct kvmppc_linear_info *kvm_alloc_linear(int type) > -{ > - struct kvmppc_linear_info *ri, *ret; > - > - ret = NULL; > - spin_lock(&linear_lock); > - list_for_each_entry(ri,&free_linears, list) { > - if (ri->type != type) > - continue; > - > - list_del(&ri->list); > - atomic_inc(&ri->use_count); > - memset(ri->base_virt, 0, ri->npages<< PAGE_SHIFT); > - ret = ri; > - break; > - } > - spin_unlock(&linear_lock); > - return ret; > -} > - > -static void kvm_release_linear(struct kvmppc_linear_info *ri) > -{ > - if (atomic_dec_and_test(&ri->use_count)) { > - spin_lock(&linear_lock); > - list_add_tail(&ri->list,&free_linears); > - spin_unlock(&linear_lock); > - > - } > -} > - > -/* > - * Called at boot time while the bootmem allocator is active, > - * to allocate contiguous physical memory for the hash page > - * tables for guests. > - */ > -void __init kvm_linear_init(void) > -{ > - /* RMA */ > - /* Only do this on PPC970 in HV mode */ > - if (!cpu_has_feature(CPU_FTR_HVMODE) || > - !cpu_has_feature(CPU_FTR_ARCH_201)) > - return; > - > - if (!kvm_rma_size || !kvm_rma_count) > - return; > - > - /* Check that the requested size is one supported in hardware */ > - if (lpcr_rmls(kvm_rma_size)< 0) { > - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); > - return; > - } > - > - kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); > -} > - > /** > * kvm_cma_reserve() - reserve area for kvm hash pagetable > * > @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void) > align_size = __rounddown_pow_of_two(selected_size); > else > align_size = HPT_ALIGN_PAGES<< PAGE_SHIFT; > + > + align_size = max(kvm_rma_pages<< PAGE_SHIFT, align_size); > kvm_cma_declare_contiguous(selected_size, align_size); > } > } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org