From: Dave Hansen <haveblue@us.ibm.com>
To: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: linux-mm@kvack.org
Subject: [RFC][PATCH] allow bigger PAGE_OFFSET with PAE
Date: Tue, 07 Jan 2003 12:06:38 -0800 [thread overview]
Message-ID: <3E1B334E.8030807@us.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1157 bytes --]
Currently, with PAE enabled, we require the user:kernel split to occur
on a PMD boundary, so it can only be done in 1GB increments. There
are 2 reasons for this. First, kernel_physical_mapping_init()
assumes, when it is initializing the kernel's PMD entries, that they
start at offset 0 inside the PMD. This is fixed by starting them at
__pmd_offset(PAGE_OFFSET) instead.
Secondly, secondary SMP cpus require that the trampoline code be
identity mapped (map virtual addresses to the same as physical ones).
Right now, this is accomplished by setting the first PGD entry to
be the same as the last. This is OK, as long as that PGD is
eventually mapping to physical 0x00000000. My changes above break
that. So, I allocate another PMD, and use it for the identity
mapping. The current code is in place to allocate PTE if you're
using PAE without PSE support, but there is nothing to free them. Any
suggestions for a clean way to do this?
Also, this gets the kernel's pagetables right, but neglects
userspace's for now. pgd_alloc() needs to be fixed to allocate
another PMD, if the split isn't PMD-alighed.
--
Dave Hansen
haveblue@us.ibm.com
[-- Attachment #2: unaligned-page_offset-pae-2.5.53-3.patch --]
[-- Type: text/plain, Size: 4181 bytes --]
diff -ur linux-2.5.53-clean/arch/i386/mm/init.c linux-2.5.53-weirdsplit/arch/i386/mm/init.c
--- linux-2.5.53-clean/arch/i386/mm/init.c Mon Dec 23 21:21:03 2002
+++ linux-2.5.53-weirdsplit/arch/i386/mm/init.c Mon Jan 6 09:41:02 2003
@@ -117,6 +117,24 @@
}
}
+
+/*
+ * Abstract out using large pages when mapping KVA, or the SMP identity
+ * mapping
+ */
+void pmd_map_pfn_range(pmd_t* pmd_entry, unsigned long pfn, unsigned long max_pfn)
+{
+ int pte_ofs;
+ /* Map with big pages if possible, otherwise create normal page tables. */
+ if (cpu_has_pse) {
+ set_pmd(pmd_entry, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+ pfn += PTRS_PER_PTE;
+ } else {
+ pte_t* pte = one_page_table_init(pmd_entry);
+ for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_pfn; pte++, pfn++, pte_ofs++)
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ }
+}
/*
* This maps the physical memory to kernel virtual address space, a total
* of max_low_pfn pages, by creating page tables starting from address
@@ -127,8 +145,7 @@
unsigned long pfn;
pgd_t *pgd;
pmd_t *pmd;
- pte_t *pte;
- int pgd_ofs, pmd_ofs, pte_ofs;
+ int pgd_ofs, pmd_ofs;
pgd_ofs = __pgd_offset(PAGE_OFFSET);
pgd = pgd_base + pgd_ofs;
@@ -138,19 +155,47 @@
pmd = one_md_table_init(pgd);
if (pfn >= max_low_pfn)
continue;
- for (pmd_ofs = 0; pmd_ofs < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_ofs++) {
- /* Map with big pages if possible, otherwise create normal page tables. */
- if (cpu_has_pse) {
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
- pfn += PTRS_PER_PTE;
- } else {
- pte = one_page_table_init(pmd);
-
- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++)
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
- }
+
+ /* beware of starting KVA in the middle of a pmd. */
+ if( pgd_ofs == __pgd_offset(PAGE_OFFSET) ) {
+ pmd_ofs = __pmd_offset(PAGE_OFFSET);
+ pmd = &pmd[pmd_ofs];
+ } else
+ pmd_ofs = 0;
+
+ for (; pmd_ofs < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_ofs++) {
+ pmd_map_pfn_range(pmd, pfn, max_low_pfn);
+ pfn += PTRS_PER_PTE;
}
- }
+ }
+}
+
+
+/*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup in zap_low_mappings().
+ */
+static void __init low_physical_mapping_init(pgd_t *pgd_base)
+{
+#if CONFIG_X86_PAE
+ unsigned long pfn = 0;
+ int pmd_ofs = 0;
+ pmd_t *pmd = one_md_table_init(pgd_base);
+
+ if(!cpu_has_pse) {
+ printk("PAE enabled, but no support for PSE (large pages)!");
+ printk("this is likely to waste some RAM.");
+ }
+
+ for (; pmd_ofs < PTRS_PER_PMD && pfn <= max_low_pfn; pmd++, pmd_ofs++) {
+ pmd_map_pfn_range(pmd, pfn, max_low_pfn);
+ pfn += PTRS_PER_PTE;
+ }
+#endif
+
}
static inline int page_kills_ppro(unsigned long pagenr)
@@ -213,7 +258,7 @@
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
- pkmap_page_table = pte;
+ pkmap_page_table = pte;
}
void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
@@ -278,6 +323,7 @@
}
kernel_physical_mapping_init(pgd_base);
+ low_physical_mapping_init(pgd_base);
remap_numa_kva();
/*
@@ -286,19 +332,7 @@
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
page_table_range_init(vaddr, 0, pgd_base);
-
permanent_kmaps_init(pgd_base);
-
-#if CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
-#endif
}
void zap_low_mappings (void)
@@ -310,6 +344,7 @@
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
+ free_page(pgd_page(swapper_pg_dir[0]));
for (i = 0; i < USER_PTRS_PER_PGD; i++)
#if CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
next reply other threads:[~2003-01-07 20:06 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-01-07 20:06 Dave Hansen [this message]
2003-01-07 23:37 ` William Lee Irwin III
2003-01-08 21:04 ` Dave Hansen
2003-01-08 22:05 ` William Lee Irwin III
2003-01-08 22:44 ` William Lee Irwin III
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3E1B334E.8030807@us.ibm.com \
--to=haveblue@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox