From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ian Wienand Date: Wed, 10 May 2006 13:42:33 +1000 Message-Id: <20060510034233.17792.67377.sendpatchset@wagner.orchestra.cse.unsw.EDU.AU> In-Reply-To: <20060510034206.17792.82504.sendpatchset@wagner.orchestra.cse.unsw.EDU.AU> References: <20060510034206.17792.82504.sendpatchset@wagner.orchestra.cse.unsw.EDU.AU> Subject: [RFC 5/6] LVHPT - setup infrastructure Sender: owner-linux-mm@kvack.org Return-Path: To: linux-ia64@vger.kernel.org Cc: linux-mm@kvack.org, Ian Wienand List-ID: LVHPT setup The following patch sets up the LVHPT on boot. For the initial boot processor, we allocate the VHPT in ia64_mmu_init. Other CPUs get the LVHPT allocated from do_boot_cpu before they are woken up. The logic is per CPU, but it attempts to choose a reasonable size that can be pinned in the TLB. There are facilities to clamp it to a specific size. Signed-Off-By: Ian Wienand --- Documentation/kernel-parameters.txt | 14 +++ arch/ia64/kernel/setup.c | 31 +++++++ arch/ia64/kernel/smpboot.c | 12 ++ arch/ia64/mm/Makefile | 1 arch/ia64/mm/init.c | 55 ++++++++++--- arch/ia64/mm/lvhpt.c | 150 ++++++++++++++++++++++++++++++++++++ include/asm-ia64/kregs.h | 1 include/asm-ia64/lvhpt.h | 24 +++++ include/asm-ia64/pgtable.h | 10 ++ 9 files changed, 286 insertions(+), 12 deletions(-) Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/setup.c =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/kernel/setup.c 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/setup.c 2006-05-10 10:01:49.000000000 +1000 @@ -62,6 +62,10 @@ #include #include +#if defined(CONFIG_IA64_LONG_FORMAT_VHPT) +#include +#endif + #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) # error "struct cpuinfo_ia64 too big!" #endif @@ -284,6 +288,18 @@ #endif } +static void __init parse_cmdline_early (char ** cmdline_p) +{ +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT + char *p; + extern int lvhpt_bits_clamp_setup(char *s); + + strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); + if ((p = strstr(*cmdline_p, "lvhpt_bits_clamp="))) + lvhpt_bits_clamp_setup(p + 17); +#endif +} + static void __init io_port_init (void) { @@ -400,12 +416,14 @@ void __init setup_arch (char **cmdline_p) { + extern void __devinit ia64_tlb_early_init(void); + unw_init(); ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); *cmdline_p = __va(ia64_boot_param->command_line); - strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); + parse_cmdline_early(cmdline_p); efi_init(); io_port_init(); @@ -438,6 +456,17 @@ ia64_setup_printk_clock(); + /* Setup some information about the TLBS */ + ia64_tlb_early_init(); + +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT + /* + * put this after all the ACPI walking so we can get the size + * of memory on nodes + */ + compute_vhpt_size(); +#endif + #ifdef CONFIG_SMP cpu_physical_id(0) = hard_smp_processor_id(); Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/smpboot.c =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/kernel/smpboot.c 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/kernel/smpboot.c 2006-05-10 10:01:49.000000000 +1000 @@ -61,6 +61,11 @@ #include #include +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +#include +#endif + + #define SMP_DEBUG 0 #if SMP_DEBUG @@ -512,6 +517,13 @@ do_rest: task_for_booting_cpu = c_idle.idle; +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT + if (alloc_vhpt(cpu)) { + panic("Couldn't allocate VHPT on CPU %d\n", cpu); + } + Dprintk("Allocated long format VHPT for CPU %d at: 0x%lx, size: 0x%lx\n", cpu, vhpt_base[cpu], long_vhpt_size(cpu)); +#endif /* CONFIG_IA64_LONG_FORMAT_VHPT */ + Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); set_brendez_area(cpu); Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/init.c =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/mm/init.c 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/init.c 2006-05-10 10:01:49.000000000 +1000 @@ -37,6 +37,10 @@ #include #include +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +#include +#endif + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist); @@ -338,7 +342,7 @@ void __devinit ia64_mmu_init (void *my_cpu_data) { - unsigned long psr, pta, impl_va_bits; + unsigned long psr, pta; extern void __devinit tlb_init (void); #ifdef CONFIG_DISABLE_VHPT @@ -347,16 +351,47 @@ # define VHPT_ENABLE_BIT 1 #endif +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT + int cpu = smp_processor_id(); + + /* Allocate the VHPT for the boot processor. VHPT for other + * processors is allocated in smpboot.c:do_boot_cpu() as they + * are bought online. + */ + if (cpu == 0) + { + unsigned long size = lvhpt_size(cpu); + lvhpt_per_cpu_info[0].base = (unsigned long)__alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); + if (lvhpt_per_cpu_info[0].base == 0) + panic("Couldn't allocate VHPT on CPU %d, size: 0x%lx!\n", + cpu, size); + printk(KERN_INFO "Allocated long format VHPT for boot processor (CPU %d) at: 0x%lx, size: 0x%lx\n", + cpu, lvhpt_per_cpu_info[0].base, size); + } +#else /* !CONFIG_IA64_LONG_FORMAT_VHPT */ + unsigned long impl_va_bits; +#endif /* Pin mapping for percpu area into TLB */ psr = ia64_clear_ic(); ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PERCPU_PAGE_SHIFT); +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT + /* Insert the permanent translation for the VHPT */ + ia64_itr(0x2, IA64_TR_LONG_VHPT, LONG_VHPT_BASE, + pte_val(pfn_pte(__pa(lvhpt_per_cpu_info[cpu].base) >> PAGE_SHIFT, PAGE_KERNEL)), lvhpt_per_cpu_info[cpu].bits); +#endif ia64_set_psr(psr); ia64_srlz_i(); - +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +# define VHPT_FORMAT_BIT 1 +# define vhpt_bits lvhpt_per_cpu_info[cpu].bits + pta = LONG_VHPT_BASE; +#else /* + * SHORT FORMAT VHPT (virtually mapped linear pagetable) + * * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped * address space. The IA-64 architecture guarantees that at least 50 bits of * virtual address space are implemented but if we pick a large enough page size @@ -367,6 +402,7 @@ * address space to not permit mappings that would overlap with the VMLPT. * --davidm 00/12/06 */ +# define VHPT_FORMAT_BIT 0 # define pte_bits 3 # define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) /* @@ -376,28 +412,27 @@ * non-speculative accesses to the virtual page table, so the address range of the * virtual page table itself needs to be covered by virtual page table. */ -# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +# define vhpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) # define POW2(n) (1ULL << (n)) - impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); if (impl_va_bits < 51 || impl_va_bits > 61) panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); /* * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need, - * which must fit into "vmlpt_bits - pte_bits" slots. Second half of + * which must fit into "vhpt_bits - pte_bits" slots. Second half of * the test makes sure that our mapped space doesn't overlap the * unimplemented hole in the middle of the region. */ - if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) || + if ((mapped_space_bits - PAGE_SHIFT > vhpt_bits - pte_bits) || (mapped_space_bits > impl_va_bits - 1)) panic("Cannot build a big enough virtual-linear page table" " to cover mapped address space.\n" " Try using a smaller page size.\n"); - /* place the VMLPT at the end of each page-table mapped region: */ - pta = POW2(61) - POW2(vmlpt_bits); + pta = POW2(61) - POW2(vhpt_bits); +#endif /* * Set the (virtually mapped linear) page table address. Bit @@ -405,10 +440,8 @@ * size of the table, and bit 0 whether the VHPT walker is * enabled. */ - ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); - + ia64_set_pta(pta | (VHPT_FORMAT_BIT << 8) | (vhpt_bits << 2) | VHPT_ENABLE_BIT); ia64_tlb_init(); - #ifdef CONFIG_HUGETLB_PAGE ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); ia64_srlz_d(); Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/pgtable.h =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/include/asm-ia64/pgtable.h 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/pgtable.h 2006-05-10 10:08:46.000000000 +1000 @@ -156,6 +156,10 @@ #include #include +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +#include +#endif + /* * Next come the mappings that determine how mmap() protection bits * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The @@ -556,6 +560,7 @@ extern void memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn); # endif /* CONFIG_VIRTUAL_MEM_MAP */ + # endif /* !__ASSEMBLY__ */ /* @@ -576,6 +581,11 @@ #define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) /* + * Long format VHPT + */ +#define LONG_VHPT_BASE (0xc000000000000000 - (1UL << lvhpt_per_cpu_info[smp_processor_id()].bits)) + +/* * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) Index: linux-2.6.17-rc3-lvhpt-v2-fresh/Documentation/kernel-parameters.txt =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/Documentation/kernel-parameters.txt 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/Documentation/kernel-parameters.txt 2006-05-10 10:01:49.000000000 +1000 @@ -50,6 +50,7 @@ ISDN Appropriate ISDN support is enabled. JOY Appropriate joystick support is enabled. LP Printer support is enabled. + LONG_FORMAT_VHPT Long Format VHPT is enabled LOOP Loopback device support is enabled. M68k M68k architecture is enabled. These options have more detailed description inside of @@ -805,6 +806,19 @@ ltpc= [NET] Format: ,, + lvhpt_bits_clamp= [IA64,LONG_FORMAT_VHPT] + Format: <1-39> + + Clamp the size of the LVHPT (on all + nodes for a NUMA system) to 2^n bits. + E.g. 2^22 gives a LVHPT of 4MB. We + pin a TLB entry of this size, so the + size must be valid for the + architecture, otherwise your kernel + will not boot. By default we take a + good guess at sizing this for optimal + operation. + mac5380= [HW,SCSI] Format: ,,,, Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/Makefile =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/arch/ia64/mm/Makefile 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/Makefile 2006-05-10 10:01:49.000000000 +1000 @@ -9,3 +9,4 @@ obj-$(CONFIG_DISCONTIGMEM) += discontig.o obj-$(CONFIG_SPARSEMEM) += discontig.o obj-$(CONFIG_FLATMEM) += contig.o +obj-$(CONFIG_IA64_LONG_FORMAT_VHPT) += lvhpt.o \ No newline at end of file Index: linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/lvhpt.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/arch/ia64/mm/lvhpt.c 2006-05-10 13:05:53.000000000 +1000 @@ -0,0 +1,150 @@ +/* Long Format VHPT support functions */ +#include +#include +#include + +#include +#include +#include + +#include +#include + +/* + * This allows you to clamp the number of bits used for the long + * format vhpt. TODO check for invalid values here. + */ +static int lvhpt_bits_clamp; + +int __init +lvhpt_bits_clamp_setup(char *s) +{ + if (sscanf(s, "%d", &lvhpt_bits_clamp) <= 0) + lvhpt_bits_clamp = 0; + return 1; +} + +__setup("lvhpt_bits_clamp=", lvhpt_bits_clamp_setup); + + +/* We try to size the LVHPT to cover the node local memory for the + * CPU. On initalisation we stash the location and size of each CPU's + * LVHPT table here. */ +struct lvhpt_per_cpu_info_struct lvhpt_per_cpu_info[NR_CPUS]; + +/* + * This code must be called on a CPU which has it's MMU + * initialized. The page allocator seems to depend on it. + * + * Returns 0 on success. + */ +unsigned int +alloc_vhpt(int cpu) +{ + int lvhpt_bits = lvhpt_per_cpu_info[cpu].bits; + +#ifdef CONFIG_NUMA + struct page *page; + + page = alloc_pages_node(cpu_to_node(cpu), __GFP_HIGHMEM|GFP_ATOMIC, lvhpt_bits - PAGE_SHIFT); + if (!page) + return -1; + lvhpt_per_cpu_info[cpu].base = (unsigned long) page_address(page); +#else + lvhpt_per_cpu_info[cpu].base = (unsigned long)__get_free_pages(__GFP_HIGHMEM|GFP_ATOMIC, + lvhpt_bits - PAGE_SHIFT); +#endif + return (lvhpt_per_cpu_info[cpu].base == 0UL); +} + +/* + * Passed to efi_memmap_walk to simply add up how much memory we have. + * This is used to size the LVHPT + */ +static int +get_total_ram(unsigned long start, unsigned long end, void *arg) +{ + unsigned long *s = arg; + *s += (end - start); + return 0; +} + +/* We use this for sizing the lvhpt */ +static unsigned long lvhpt_node_addressable_memory[MAX_NUMNODES]; + +#ifdef CONFIG_NUMA +static void compute_vhpt_size_numa(void) +{ + int i; + + if (lvhpt_bits_clamp) + { + printk(KERN_INFO "Clamping LVHPT to %d bits\n", lvhpt_bits_clamp); + for (i = 0; i < NR_CPUS; i++) + lvhpt_per_cpu_info[i].bits = lvhpt_bits_clamp; + return; + } + + /* In the NUMA case, we evaluate how much memory each node has + * and then try to size it to three times the physical memory + * of the node (as this gives us the best coverage. As we pin + * this with a TLB entry, we need to make sure the size we + * choose is however suitable for the architecture. + */ + for (i = 0; i < num_node_memblks; i++) { + printk(KERN_ERR "vhpt_addr_mem[%d] = %lx\n", node_memblk[i].nid, node_memblk[i].size); + lvhpt_node_addressable_memory[node_memblk[i].nid] += + node_memblk[i].size; + } + + for (i = 0; i < NR_CPUS; i++) + { + unsigned long size = + 3 * (lvhpt_node_addressable_memory[cpu_to_node(i)] >> PAGE_SHIFT); + lvhpt_per_cpu_info[i].bits = find_largest_page_size(size); + } +} +#endif + +/* This version in both NUMA and non-NUMA, since we can use it in either. */ +static void compute_vhpt_size_non_numa(void) +{ + int i; + int bits; + + if (lvhpt_bits_clamp) + { + printk(KERN_INFO "Clamping LVHPT to %d bits\n", lvhpt_bits_clamp); + for (i=0; i < NR_CPUS; i++) + lvhpt_per_cpu_info[i].bits = lvhpt_bits_clamp; + return; + } + + /* If this doesn't work, try to find the total memory in the + * system, which we will then size the lvhpt table to cover + * three times over. + */ + efi_memmap_walk(get_total_ram, &lvhpt_node_addressable_memory); + + /* Put a lower clamp on this of a fairly reasonable 4 megabytes */ + bits = max(22UL, find_largest_page_size(lvhpt_node_addressable_memory[0] >> PAGE_SHIFT)); + + for (i=0; i < NR_CPUS; i++) + lvhpt_per_cpu_info[i].bits = bits; +} + +void __init +compute_vhpt_size(void) +{ +#ifdef CONFIG_NUMA + /* Machines like the ZX1 don't setup all the node info we + * require, but someone might still try to boot a NUMA kernel + * on it. In this case, fall back to our non-numa case.*/ + if (num_node_memblks == 0) + compute_vhpt_size_non_numa(); + else + compute_vhpt_size_numa(); +#else + compute_vhpt_size_non_numa(); +#endif +} Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/lvhpt.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/lvhpt.h 2006-05-10 10:01:49.000000000 +1000 @@ -0,0 +1,24 @@ +/* Long Format VHPT includes */ +#ifndef _ASM_IA64_LVHPT_H +#define _ASM_IA64_LVHPT_H + +/* We keep a per-cpu record of the VHPT base and size for each + * processor */ +struct lvhpt_per_cpu_info_struct { + unsigned long base; + int bits; +}; + +extern struct lvhpt_per_cpu_info_struct lvhpt_per_cpu_info[NR_CPUS]; + +/* Initalise and compute the VHPT size for each CPU */ +void __init compute_vhpt_size(void); +/* Allocate the VHPT for a CPU */ +unsigned int alloc_vhpt(int cpu); + +static inline unsigned long lvhpt_size(int cpu) +{ + return (1UL << lvhpt_per_cpu_info[cpu].bits); +} + +#endif /* _ASM_IA64_LVHPT_H */ Index: linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/kregs.h =================================================================== --- linux-2.6.17-rc3-lvhpt-v2-fresh.orig/include/asm-ia64/kregs.h 2006-05-10 10:00:50.000000000 +1000 +++ linux-2.6.17-rc3-lvhpt-v2-fresh/include/asm-ia64/kregs.h 2006-05-10 10:01:49.000000000 +1000 @@ -31,6 +31,7 @@ #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */ #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */ +#define IA64_TR_LONG_VHPT 3 /* dtr3: maps long format VHPT */ /* Processor status register bits: */ #define IA64_PSR_BE_BIT 1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org