From: "Martin J. Bligh" <mbligh@aracnet.com>
To: linux-mm mailing list <linux-mm@kvack.org>
Subject: [RFC] patch to move lmem_map for each node onto their own nodes
Date: Tue, 10 Sep 2002 21:31:54 -0700 [thread overview]
Message-ID: <442355984.1031693514@[10.10.2.3]> (raw)
64 bit arches find this easy, but it's not so simple on
32 bit arches ... mem_maps need to be in permanent KVA,
and all of ZONE_NORMAL is on node 0. Thus this patch
creates a window between the top of ZONE_NORMAL and the
start of the highmem window (max_low_pfn ... highstart_pfn)
and remaps some memory stolen from the other nodes into
that window.
Adds set_pmd_pfn which does the remapping work, and a couple
of functions to discontigmem.c which calculate the required
space and lower max_low_pfn by reserve_pages.
Comments, flamage, abuse? I'll submit it if you don't ... ;-)
Martin.
diff -urN -X /home/mbligh/.diff.exclude 39-clean-numa/arch/i386/mm/discontig.c 40-numamap/arch/i386/mm/discontig.c
--- 39-clean-numa/arch/i386/mm/discontig.c Fri Sep 6 23:03:19 2002
+++ 40-numamap/arch/i386/mm/discontig.c Tue Sep 10 17:04:11 2002
@@ -1,5 +1,6 @@
/*
- * Written by: Patricia Gaughen, IBM Corporation
+ * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation
+ * August 2002: added remote node KVA remap - Martin J. Bligh
*
* Copyright (C) 2002, IBM Corp.
*
@@ -19,8 +20,6 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
*/
#include <linux/config.h>
@@ -113,35 +112,98 @@
}
}
+#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
+
+unsigned long node_remap_start_pfn[MAX_NUMNODES];
+unsigned long node_remap_size[MAX_NUMNODES];
+unsigned long node_remap_offset[MAX_NUMNODES];
+void *node_remap_start_vaddr[MAX_NUMNODES];
+extern void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
+
+void __init remap_numa_kva(void)
+{
+ void *vaddr;
+ unsigned long pfn;
+ int node;
+
+ for (node = 1; node < numnodes; ++node) {
+ for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
+ vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
+ set_pmd_pfn((ulong) vaddr,
+ node_remap_start_pfn[node] + pfn,
+ PAGE_KERNEL_LARGE);
+ }
+ }
+}
+
+static unsigned long calculate_numa_remap_pages(void)
+{
+ int nid;
+ unsigned long size, reserve_pages = 0;
+
+ for (nid = 1; nid < numnodes; nid++) {
+ /* calculate the size of the mem_map needed in bytes */
+ size = (node_end_pfn[nid] - node_start_pfn[nid] + 1)
+ * sizeof(struct page);
+ /* convert size to large (pmd size) pages, rounding up */
+ size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
+ /* now the roundup is correct, convert to PAGE_SIZE pages */
+ size = size * PTRS_PER_PTE;
+ printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
+ size, nid);
+ node_remap_size[nid] = size;
+ reserve_pages += size;
+ node_remap_offset[nid] = reserve_pages;
+ printk("Shrinking node %d from %ld pages to %ld pages\n",
+ nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+ node_end_pfn[nid] -= size;
+ node_remap_start_pfn[nid] = node_end_pfn[nid];
+ }
+ printk("Reserving total of %ld pages for numa KVA remap\n",
+ reserve_pages);
+ return reserve_pages;
+}
+
unsigned long __init setup_memory(void)
{
int nid;
unsigned long bootmap_size, system_start_pfn, system_max_low_pfn;
+ unsigned long reserve_pages;
get_memcfg_numa();
+ reserve_pages = calculate_numa_remap_pages();
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
+ /* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(__pa(&_end));
find_max_pfn();
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
-
#ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > system_max_low_pfn) {
- highstart_pfn = system_max_low_pfn;
- }
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- pages_to_mb(highend_pfn - highstart_pfn));
+ highstart_pfn = highend_pfn = max_pfn;
+ if (max_pfn > system_max_low_pfn)
+ highstart_pfn = system_max_low_pfn;
+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+ pages_to_mb(highend_pfn - highstart_pfn));
#endif
+ system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages;
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(system_max_low_pfn));
-
- for (nid = 0; nid < numnodes; nid++)
+ printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n",
+ min_low_pfn, max_low_pfn, highstart_pfn);
+
+ printk("Low memory ends at vaddr %08lx\n",
+ (ulong) pfn_to_kaddr(max_low_pfn));
+ for (nid = 0; nid < numnodes; nid++) {
allocate_pgdat(nid);
+ node_remap_start_vaddr[nid] = pfn_to_kaddr(
+ highstart_pfn - node_remap_offset[nid]);
+ printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+ (ulong) node_remap_start_vaddr[nid],
+ (ulong) pfn_to_kaddr(highstart_pfn
+ - node_remap_offset[nid] + node_remap_size[nid]));
+ }
+ printk("High memory starts at vaddr %08lx\n",
+ (ulong) pfn_to_kaddr(highstart_pfn));
for (nid = 0; nid < numnodes; nid++)
find_max_pfn_node(nid);
@@ -244,7 +306,18 @@
#endif
}
}
- free_area_init_node(nid, NODE_DATA(nid), 0, zones_size, start, 0);
+ /*
+ * We let the lmem_map for node 0 be allocated from the
+ * normal bootmem allocator, but other nodes come from the
+ * remapped KVA area - mbligh
+ */
+ if (nid)
+ free_area_init_node(nid, NODE_DATA(nid),
+ node_remap_start_vaddr[nid], zones_size,
+ start, 0);
+ else
+ free_area_init_node(nid, NODE_DATA(nid), 0,
+ zones_size, start, 0);
}
return;
}
diff -urN -X /home/mbligh/.diff.exclude 39-clean-numa/arch/i386/mm/init.c 40-numamap/arch/i386/mm/init.c
--- 39-clean-numa/arch/i386/mm/init.c Fri Sep 6 23:03:19 2002
+++ 40-numamap/arch/i386/mm/init.c Tue Sep 10 17:09:14 2002
@@ -245,6 +245,12 @@
unsigned long __PAGE_KERNEL = _PAGE_KERNEL;
+#ifndef CONFIG_DISCONTIGMEM
+#define remap_numa_kva() do {} while (0)
+#else
+extern void __init remap_numa_kva(void);
+#endif
+
static void __init pagetable_init (void)
{
unsigned long vaddr;
@@ -269,6 +275,7 @@
}
kernel_physical_mapping_init(pgd_base);
+ remap_numa_kva();
/*
* Fixed mappings, only the page table structure has to be
@@ -449,7 +456,11 @@
set_max_mapnr_init();
+#ifdef CONFIG_HIGHMEM
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+#else
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+#endif
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
diff -urN -X /home/mbligh/.diff.exclude 39-clean-numa/arch/i386/mm/pgtable.c 40-numamap/arch/i386/mm/pgtable.c
--- 39-clean-numa/arch/i386/mm/pgtable.c Fri Sep 6 23:03:19 2002
+++ 40-numamap/arch/i386/mm/pgtable.c Sat Sep 7 12:48:21 2002
@@ -84,6 +84,39 @@
__flush_tlb_one(vaddr);
}
+/*
+ * Associate a large virtual page frame with a given physical page frame
+ * and protection flags for that frame. pfn is for the base of the page,
+ * vaddr is what the page gets mapped to - both must be properly aligned.
+ * The pmd must already be instantiated. Assumes PAE mode.
+ */
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+
+ if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
+ printk ("set_pmd_pfn: vaddr misaligned\n");
+ return; /* BUG(); */
+ }
+ if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
+ printk ("set_pmd_pfn: pfn misaligned\n");
+ return; /* BUG(); */
+ }
+ pgd = swapper_pg_dir + __pgd_offset(vaddr);
+ if (pgd_none(*pgd)) {
+ printk ("set_pmd_pfn: pgd_none\n");
+ return; /* BUG(); */
+ }
+ pmd = pmd_offset(pgd, vaddr);
+ set_pmd(pmd, pfn_pmd(pfn, flags));
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
diff -urN -X /home/mbligh/.diff.exclude 39-clean-numa/include/asm-i386/page.h 40-numamap/include/asm-i386/page.h
--- 39-clean-numa/include/asm-i386/page.h Fri Sep 6 23:03:19 2002
+++ 40-numamap/include/asm-i386/page.h Sat Sep 7 12:48:21 2002
@@ -142,6 +142,7 @@
#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#ifndef CONFIG_DISCONTIGMEM
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
reply other threads:[~2002-09-11 4:32 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='442355984.1031693514@[10.10.2.3]' \
--to=mbligh@aracnet.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox