From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Linux Memory Management <linux-mm@kvack.org>
Cc: Andi Kleen <ak@suse.de>, Hugh Dickins <hugh@veritas.com>,
Linus Torvalds <torvalds@osdl.org>, Andrew Morton <akpm@osdl.org>
Subject: [PATCH 4/10] alternate 4-level page tables patches
Date: Sat, 18 Dec 2004 17:57:12 +1100 [thread overview]
Message-ID: <41C3D4C8.1000508@yahoo.com.au> (raw)
In-Reply-To: <41C3D4AE.7010502@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 5 bytes --]
4/10
[-- Attachment #2: 3level-clear_page_range.patch --]
[-- Type: text/plain, Size: 6693 bytes --]
Rename clear_page_tables to clear_page_range. clear_page_range takes byte
ranges, and aggressively frees page table pages. Maybe useful to control
page table memory consumption on 4-level architectures (and even 3 level
ones).
Possible downsides are:
- flush_tlb_pgtables gets called more often (only a problem for sparc64
AFAIKS).
- the opportunistic "expand to fill PGDIR_SIZE hole" logic that ensures
something actually gets done under the old system is still in place.
This could sometimes make unmapping small regions more inefficient. There
are some other solutions to look at if this is the case though.
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
---
linux-2.6-npiggin/include/linux/mm.h | 2
linux-2.6-npiggin/mm/memory.c | 80 +++++++++++++++++++++--------------
linux-2.6-npiggin/mm/mmap.c | 23 +++-------
3 files changed, 58 insertions(+), 47 deletions(-)
diff -puN include/linux/mm.h~3level-clear_page_range include/linux/mm.h
--- linux-2.6/include/linux/mm.h~3level-clear_page_range 2004-12-18 16:50:44.000000000 +1100
+++ linux-2.6-npiggin/include/linux/mm.h 2004-12-18 17:07:48.000000000 +1100
@@ -566,7 +566,7 @@ int unmap_vmas(struct mmu_gather **tlbp,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
-void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr);
+void clear_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
diff -puN mm/memory.c~3level-clear_page_range mm/memory.c
--- linux-2.6/mm/memory.c~3level-clear_page_range 2004-12-18 16:50:44.000000000 +1100
+++ linux-2.6-npiggin/mm/memory.c 2004-12-18 17:07:48.000000000 +1100
@@ -100,58 +100,76 @@ static inline void copy_cow_page(struct
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
-static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir)
+static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
{
struct page *page;
- if (pmd_none(*dir))
+ if (pmd_none(*pmd))
return;
- if (unlikely(pmd_bad(*dir))) {
- pmd_ERROR(*dir);
- pmd_clear(dir);
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
return;
}
- page = pmd_page(*dir);
- pmd_clear(dir);
- dec_page_state(nr_page_table_pages);
- tlb->mm->nr_ptes--;
- pte_free_tlb(tlb, page);
+ if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK)) {
+ page = pmd_page(*pmd);
+ pmd_clear(pmd);
+ dec_page_state(nr_page_table_pages);
+ tlb->mm->nr_ptes--;
+ pte_free_tlb(tlb, page);
+ }
}
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
+static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
{
- int j;
- pmd_t * pmd;
+ unsigned long addr = start, next;
+ pmd_t *pmd, *__pmd;
- if (pgd_none(*dir))
+ if (pgd_none(*pgd))
return;
- if (unlikely(pgd_bad(*dir))) {
- pgd_ERROR(*dir);
- pgd_clear(dir);
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
return;
}
- pmd = pmd_offset(dir, 0);
- pgd_clear(dir);
- for (j = 0; j < PTRS_PER_PMD ; j++)
- free_one_pmd(tlb, pmd+j);
- pmd_free_tlb(tlb, pmd);
+
+ pmd = __pmd = pmd_offset(pgd, start);
+ do {
+ next = (addr + PMD_SIZE) & PMD_MASK;
+ if (next > end || next <= addr)
+ next = end;
+
+ clear_pmd_range(tlb, pmd, addr, next);
+ pmd++;
+ addr = next;
+ } while (addr && (addr <= end - 1));
+
+ if (!(start & ~PGDIR_MASK) && !(end & ~PGDIR_MASK)) {
+ pgd_clear(pgd);
+ pmd_free_tlb(tlb, __pmd);
+ }
}
/*
- * This function clears all user-level page tables of a process - this
- * is needed by execve(), so that old pages aren't in the way.
+ * This function clears user-level page tables of a process.
*
* Must be called with pagetable lock held.
*/
-void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
+void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- pgd_t * page_dir = tlb->mm->pgd;
+ unsigned long addr = start, next;
+ unsigned long i, nr = pgd_index(end + PGDIR_SIZE-1) - pgd_index(start);
+ pgd_t * pgd = pgd_offset(tlb->mm, start);
- page_dir += first;
- do {
- free_one_pgd(tlb, page_dir);
- page_dir++;
- } while (--nr);
+ for (i = 0; i < nr; i++) {
+ next = (addr + PGDIR_SIZE) & PGDIR_MASK;
+ if (next > end || next <= addr)
+ next = end;
+
+ clear_pgd_range(tlb, pgd, addr, next);
+ pgd++;
+ addr = next;
+ }
}
pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
diff -puN mm/mmap.c~3level-clear_page_range mm/mmap.c
--- linux-2.6/mm/mmap.c~3level-clear_page_range 2004-12-18 16:50:44.000000000 +1100
+++ linux-2.6-npiggin/mm/mmap.c 2004-12-18 16:50:44.000000000 +1100
@@ -1474,7 +1474,6 @@ static void free_pgtables(struct mmu_gat
{
unsigned long first = start & PGDIR_MASK;
unsigned long last = end + PGDIR_SIZE - 1;
- unsigned long start_index, end_index;
struct mm_struct *mm = tlb->mm;
if (!prev) {
@@ -1499,24 +1498,16 @@ static void free_pgtables(struct mmu_gat
last = next->vm_start;
}
if (prev->vm_end > first)
- first = prev->vm_end + PGDIR_SIZE - 1;
+ first = prev->vm_end;
break;
}
no_mmaps:
if (last < first) /* for arches with discontiguous pgd indices */
return;
- /*
- * If the PGD bits are not consecutive in the virtual address, the
- * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
- */
- start_index = pgd_index(first);
- if (start_index < FIRST_USER_PGD_NR)
- start_index = FIRST_USER_PGD_NR;
- end_index = pgd_index(last);
- if (end_index > start_index) {
- clear_page_tables(tlb, start_index, end_index - start_index);
- flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
- }
+ if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
+ first = FIRST_USER_PGD_NR * PGDIR_SIZE;
+ clear_page_range(tlb, first, last);
+ flush_tlb_pgtables(mm, first, last);
}
/* Normal function to fix up a mapping
@@ -1844,7 +1835,9 @@ void exit_mmap(struct mm_struct *mm)
~0UL, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
BUG_ON(mm->map_count); /* This is just debugging */
- clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
+ clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE,
+ (TASK_SIZE + PGDIR_SIZE - 1) & PGDIR_MASK);
+
tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
vma = mm->mmap;
_
next prev parent reply other threads:[~2004-12-18 6:57 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-18 6:55 [RFC][PATCH 0/10] " Nick Piggin
2004-12-18 6:55 ` [PATCH 1/10] " Nick Piggin
2004-12-18 6:56 ` [PATCH 2/10] " Nick Piggin
2004-12-18 6:56 ` [PATCH 3/10] " Nick Piggin
2004-12-18 6:57 ` Nick Piggin [this message]
2004-12-18 6:58 ` [PATCH 5/10] " Nick Piggin
2004-12-18 6:58 ` [PATCH 6/10] " Nick Piggin
2004-12-18 6:59 ` [PATCH 7/10] " Nick Piggin
2004-12-18 7:00 ` [PATCH 8/10] " Nick Piggin
2004-12-18 7:00 ` [PATCH 9/10] " Nick Piggin
2004-12-18 7:01 ` [PATCH 10/10] " Nick Piggin
2004-12-18 7:31 ` Andi Kleen
2004-12-18 7:46 ` Nick Piggin
2004-12-18 8:08 ` Andrew Morton
2004-12-18 9:48 ` Andi Kleen
2004-12-18 19:06 ` Linus Torvalds
2004-12-20 17:43 ` Andi Kleen
2004-12-20 17:47 ` Randy.Dunlap
2004-12-20 18:08 ` Linus Torvalds
2004-12-20 18:15 ` Linus Torvalds
2004-12-20 18:19 ` Andi Kleen
2004-12-20 18:47 ` Linus Torvalds
2004-12-20 18:52 ` Linus Torvalds
2004-12-20 18:59 ` Andi Kleen
2004-12-20 18:57 ` Randy.Dunlap
2004-12-18 9:05 ` [PATCH 4/10] " Nick Piggin
2004-12-18 9:50 ` Andi Kleen
2004-12-18 10:06 ` Nick Piggin
2004-12-18 10:11 ` Andi Kleen
2004-12-18 10:22 ` Nick Piggin
2004-12-18 10:29 ` Nick Piggin
2004-12-18 11:06 ` William Lee Irwin III
2004-12-18 11:17 ` Nick Piggin
2004-12-18 11:32 ` William Lee Irwin III
2004-12-18 11:55 ` Nick Piggin
2004-12-18 12:46 ` William Lee Irwin III
2004-12-18 12:48 ` William Lee Irwin III
2004-12-19 0:05 ` Nick Piggin
2004-12-19 0:20 ` William Lee Irwin III
2004-12-19 0:38 ` Nick Piggin
2004-12-19 1:01 ` William Lee Irwin III
2004-12-19 1:31 ` Linus Torvalds
2004-12-19 2:08 ` William Lee Irwin III
2004-12-19 2:26 ` Nick Piggin
2004-12-19 5:23 ` Linus Torvalds
2004-12-19 6:02 ` William Lee Irwin III
2004-12-19 18:17 ` Linus Torvalds
2004-12-20 1:00 ` William Lee Irwin III
2004-12-18 10:45 ` William Lee Irwin III
2004-12-18 10:58 ` Nick Piggin
2004-12-19 0:07 ` [RFC][PATCH 0/10] " Hugh Dickins
2004-12-19 0:33 ` Nick Piggin
2004-12-20 18:04 ` Andi Kleen
2004-12-20 18:40 ` Linus Torvalds
2004-12-20 18:53 ` Andi Kleen
2004-12-21 0:04 ` Linus Torvalds
2004-12-21 0:22 ` Andi Kleen
2004-12-21 0:43 ` Linus Torvalds
2004-12-21 0:47 ` Nick Piggin
2004-12-21 2:55 ` Hugh Dickins
2004-12-21 3:21 ` Nick Piggin
2004-12-21 3:47 ` Linus Torvalds
2004-12-21 3:56 ` Linus Torvalds
2004-12-21 4:04 ` Nick Piggin
2004-12-21 4:08 ` Nick Piggin
2004-12-21 9:36 ` Andi Kleen
2004-12-21 10:13 ` Hugh Dickins
2004-12-21 10:59 ` Nick Piggin
2004-12-21 17:36 ` Linus Torvalds
2004-12-21 20:19 ` Andi Kleen
2004-12-21 23:49 ` Nick Piggin
2004-12-22 10:38 ` Andi Kleen
2004-12-22 11:19 ` Nick Piggin
2004-12-22 11:23 ` Nick Piggin
2004-12-22 18:07 ` Andi Kleen
2004-12-30 21:24 ` Nick Piggin
2004-12-21 10:52 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41C3D4C8.1000508@yahoo.com.au \
--to=nickpiggin@yahoo.com.au \
--cc=ak@suse.de \
--cc=akpm@osdl.org \
--cc=hugh@veritas.com \
--cc=linux-mm@kvack.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox