linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Robin Holt <holt@sgi.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Hugh Dickins <hugh@veritas.com>
Subject: Re: munmap extremely slow even with untouched mapping.
Date: Fri, 28 Oct 2005 20:45:12 +1000	[thread overview]
Message-ID: <43620138.6060707@yahoo.com.au> (raw)
In-Reply-To: <20051028013738.GA19727@attica.americas.sgi.com>

[-- Attachment #1: Type: text/plain, Size: 1446 bytes --]

Robin Holt wrote:
> This is going out without any testing.  I got it to compile but never
> even booted the kernel.
> 
> I noticed that on ia64, the following simple program would take 24
> seconds to complete.  Profiling revealed I was spending all that time
> in unmap_vmas.  Looking over the function, I noticed that I would only
> attempt 8 pages at a time (CONFIG_PREMPT).  I then thought through this
> some more.  My particular application had one large mapping which was
> never touched after being mmaped.
> 
> Without this patch, we would iterate numerous times (256) to get past
> a region of memory that had an empty pmd, 524,288 times to get past an
> empty pud, and 1,073,741,824 to get past an empty pgd.  I had a 4-level
> page table directory patch applied at the time.
> 

Ouch. I wonder why nobody's noticed this before. It really is
horribly inefficient on sparse mappings, as you've noticed :(

I guess I prefer the following (compiled, untested) slight
variant of your patch. Measuring work in terms of address range
is fairly vague.... however, it may be the case that some
architectures take a long time to flush a large range of TLBs?

I don't see how the patch can be made too much cleaner... I guess
the rescheduling could be pushed down, like the copy_ case,
however that isn't particularly brilliant either. What's more, it
would probably get uglier due to the mmu_gather tricks...

Nick

-- 
SUSE Labs, Novell Inc.


[-- Attachment #2: mm-zap_block-redundant-fix.patch --]
[-- Type: text/plain, Size: 5291 bytes --]

Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -525,20 +525,25 @@ int copy_page_range(struct mm_struct *ds
 	return 0;
 }
 
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
-				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+static unsigned long zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			unsigned long addr, unsigned long end,
+			unsigned long *zap_work, struct zap_details *details)
 {
 	pte_t *pte;
 
 	pte = pte_offset_map(pmd, addr);
 	do {
 		pte_t ptent = *pte;
-		if (pte_none(ptent))
+		if (pte_none(ptent)) {
+			(*zap_work)--;
 			continue;
+		}
 		if (pte_present(ptent)) {
 			struct page *page = NULL;
 			unsigned long pfn = pte_pfn(ptent);
+
+			(*zap_work) -= PAGE_SIZE;
+
 			if (pfn_valid(pfn)) {
 				page = pfn_to_page(pfn);
 				if (PageReserved(page))
@@ -592,13 +597,15 @@ static void zap_pte_range(struct mmu_gat
 		if (!pte_file(ptent))
 			free_swap_and_cache(pte_to_swp_entry(ptent));
 		pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte++, addr += PAGE_SIZE, (addr != end && (long)*zap_work >0));
 	pte_unmap(pte - 1);
+
+	return addr;
 }
 
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
-				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+			unsigned long addr, unsigned long end,
+			unsigned long *zap_work, struct zap_details *details)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -606,15 +613,19 @@ static inline void zap_pmd_range(struct 
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
+		if (pmd_none_or_clear_bad(pmd)) {
+			(*zap_work)--;
 			continue;
-		zap_pte_range(tlb, pmd, addr, next, details);
-	} while (pmd++, addr = next, addr != end);
+		}
+		next = zap_pte_range(tlb, pmd, addr, next, zap_work, details);
+	} while (pmd++, addr = next, (addr != end && (long)*zap_work > 0));
+
+	return addr;
 }
 
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
-				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+			unsigned long addr, unsigned long end,
+			unsigned long *zap_work, struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -622,14 +633,17 @@ static inline void zap_pud_range(struct 
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
+		if (pud_none_or_clear_bad(pud)) {
+			(*zap_work)--;
 			continue;
-		zap_pmd_range(tlb, pud, addr, next, details);
-	} while (pud++, addr = next, addr != end);
+		}
+		next = zap_pmd_range(tlb, pud, addr, next, zap_work, details);
+	} while (pud++, addr = next, (addr != end && (long)*zap_work > 0));
 }
 
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
-				unsigned long addr, unsigned long end,
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+			struct vm_area_struct *vma, unsigned long addr,
+			unsigned long end, unsigned long *zap_work,
 				struct zap_details *details)
 {
 	pgd_t *pgd;
@@ -643,10 +657,12 @@ static void unmap_page_range(struct mmu_
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
+		if (pgd_none_or_clear_bad(pgd)) {
+			(*zap_work)--;
 			continue;
-		zap_pud_range(tlb, pgd, addr, next, details);
-	} while (pgd++, addr = next, addr != end);
+		}
+		next = zap_pud_range(tlb, pgd, addr, next, zap_work, details);
+	} while (pgd++, addr = next, (addr != end && (long)*zap_work > 0));
 	tlb_end_vma(tlb, vma);
 }
 
@@ -689,7 +705,7 @@ unsigned long unmap_vmas(struct mmu_gath
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
-	unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+	unsigned long zap_work = ZAP_BLOCK_SIZE;
 	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
 	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
@@ -710,25 +726,20 @@ unsigned long unmap_vmas(struct mmu_gath
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
 
 		while (start != end) {
-			unsigned long block;
-
 			if (!tlb_start_valid) {
 				tlb_start = start;
 				tlb_start_valid = 1;
 			}
 
-			if (is_vm_hugetlb_page(vma)) {
-				block = end - start;
+			if (unlikely(is_vm_hugetlb_page(vma))) {
 				unmap_hugepage_range(vma, start, end);
-			} else {
-				block = min(zap_bytes, end - start);
-				unmap_page_range(*tlbp, vma, start,
-						start + block, details);
-			}
+				zap_work -= (end - start) /
+						(HPAGE_SIZE / PAGE_SIZE);
+			} else
+				start += unmap_page_range(*tlbp, vma,
+						start, end, &zap_work, details);
 
-			start += block;
-			zap_bytes -= block;
-			if ((long)zap_bytes > 0)
+			if ((long)zap_work > 0)
 				continue;
 
 			tlb_finish_mmu(*tlbp, tlb_start, start);
@@ -748,7 +759,7 @@ unsigned long unmap_vmas(struct mmu_gath
 
 			*tlbp = tlb_gather_mmu(mm, fullmm);
 			tlb_start_valid = 0;
-			zap_bytes = ZAP_BLOCK_SIZE;
+			zap_work = ZAP_BLOCK_SIZE;
 		}
 	}
 out:

  reply	other threads:[~2005-10-28 10:45 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-10-28  1:37 Robin Holt
2005-10-28 10:45 ` Nick Piggin [this message]
2005-10-28 15:20   ` Hugh Dickins
2005-10-30  4:29     ` Nick Piggin
2005-10-30 16:58       ` Hugh Dickins
2005-10-31  9:10         ` Nick Piggin
2005-10-31  9:19           ` Nick Piggin
2005-10-31 12:20           ` Robin Holt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=43620138.6060707@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=holt@sgi.com \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox