From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Phillips Subject: Re: Non-GPL export of invalidate_mmap_range Date: Sat, 21 Feb 2004 14:00:16 -0500 References: <20040216190927.GA2969@us.ibm.com> <200402201800.12077.phillips@arcor.de> <20040220161738.GF1269@us.ibm.com> In-Reply-To: <20040220161738.GF1269@us.ibm.com> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <200402211400.16779.phillips@arcor.de> Sender: owner-linux-mm@kvack.org Return-Path: To: paulmck@us.ibm.com Cc: "Stephen C. Tweedie" , Andrew Morton , Christoph Hellwig , linux-kernel , linux-mm List-ID: Hi Paul et al, Here is an updated patch. The name of the exported function is changed to "invalidate_filemap_range" to reflect the fact that only file-backed pages are invalidated, and to distinguish the three parameter flavour from the four parameter version called from vmtruncate. The inner loop in zap_pte_range is hopefully correct now. While I'm in here, why is the assignment "pte =" at line 411 of memory.c not redundant? http://lxr.linux.no/source/mm/memory.c?v=2.6.1#L411 As far as I can see, the ->filemap spinlock protects the pte from modification and pte was already assigned at line 405. Anyway, we can now see that the full cost of this DFS-specific feature in the inner loop is a single (unlikely) branch. I'll repeat my proposition here: providing local filesystem semantics for MAP_PRIVATE on any distributed filesystem requires these decorations on the unmap path. Though there is no benefit for local filesystems, the cost is insignificant. Regards, Daniel --- 2.6.3.clean/include/linux/mm.h 2004-02-17 22:57:13.000000000 -0500 +++ 2.6.3/include/linux/mm.h 2004-02-21 12:59:16.000000000 -0500 @@ -430,23 +430,23 @@ void shmem_lock(struct file * file, int lock); int shmem_zero_setup(struct vm_area_struct *); -void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size); int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, struct vm_area_struct *start_vma, unsigned long start_addr, - unsigned long end_addr, unsigned long *nr_accounted); -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long address, unsigned long size); + unsigned long end_addr, unsigned long *nr_accounted, int zap); void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long size, pgprot_t prot); - -extern void invalidate_mmap_range(struct address_space *mapping, - loff_t const holebegin, - loff_t const holelen); +extern void invalidate_filemap_range(struct address_space *mapping, loff_t const start, loff_t const length); extern int vmtruncate(struct inode * inode, loff_t offset); +void invalidate_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, int all); + +static inline void zap_page_range(struct vm_area_struct *vma, ulong address, ulong size) +{ + invalidate_page_range(vma, address, size, 1); +} + extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); --- 2.6.3.clean/mm/memory.c 2004-02-17 22:57:47.000000000 -0500 +++ 2.6.3/mm/memory.c 2004-02-21 13:23:36.000000000 -0500 @@ -384,9 +384,13 @@ return -ENOMEM; } -static void -zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd, - unsigned long address, unsigned long size) +static inline int is_anon(struct page *page) +{ + return !page->mapping || PageSwapCache(page); +} + +static void zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd, + unsigned long address, unsigned long size, int all) { unsigned long offset; pte_t *ptep; @@ -409,7 +413,8 @@ continue; if (pte_present(pte)) { unsigned long pfn = pte_pfn(pte); - + if (unlikely(!all) && is_anon(pfn_to_page(pfn))) + continue; pte = ptep_get_and_clear(ptep); tlb_remove_tlb_entry(tlb, ptep, address+offset); if (pfn_valid(pfn)) { @@ -426,7 +431,7 @@ } } } else { - if (!pte_file(pte)) + if (!pte_file(pte) && all) free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear(ptep); } @@ -434,9 +439,8 @@ pte_unmap(ptep-1); } -static void -zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir, - unsigned long address, unsigned long size) +static void zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir, + unsigned long address, unsigned long size, int all) { pmd_t * pmd; unsigned long end; @@ -453,14 +457,14 @@ if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); do { - zap_pte_range(tlb, pmd, address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; + zap_pte_range(tlb, pmd, address, end - address, all); + address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); } -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long address, unsigned long end) +static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long address, unsigned long end, int all) { pgd_t * dir; @@ -474,7 +478,7 @@ dir = pgd_offset(vma->vm_mm, address); tlb_start_vma(tlb, vma); do { - zap_pmd_range(tlb, dir, address, end - address); + zap_pmd_range(tlb, dir, address, end - address, all); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); @@ -524,7 +528,7 @@ */ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start_addr, - unsigned long end_addr, unsigned long *nr_accounted) + unsigned long end_addr, unsigned long *nr_accounted, int all) { unsigned long zap_bytes = ZAP_BLOCK_SIZE; unsigned long tlb_start = 0; /* For tlb_finish_mmu */ @@ -568,7 +572,7 @@ tlb_start_valid = 1; } - unmap_page_range(*tlbp, vma, start, start + block); + unmap_page_range(*tlbp, vma, start, start + block, all); start += block; zap_bytes -= block; if ((long)zap_bytes > 0) @@ -594,8 +598,8 @@ * @address: starting address of pages to zap * @size: number of bytes to zap */ -void zap_page_range(struct vm_area_struct *vma, - unsigned long address, unsigned long size) +void invalidate_page_range(struct vm_area_struct *vma, + unsigned long address, unsigned long size, int all) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather *tlb; @@ -612,7 +616,7 @@ lru_add_drain(); spin_lock(&mm->page_table_lock); tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted); + unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, all); tlb_finish_mmu(tlb, address, end); spin_unlock(&mm->page_table_lock); } @@ -1071,10 +1075,8 @@ * Both hba and hlen are page numbers in PAGE_SIZE units. * An hlen of zero blows away the entire portion file after hba. */ -static void -invalidate_mmap_range_list(struct list_head *head, - unsigned long const hba, - unsigned long const hlen) +static void invalidate_mmap_range_list(struct list_head *head, + unsigned long const hba, unsigned long const hlen, int all) { struct list_head *curr; unsigned long hea; /* last page of hole. */ @@ -1095,9 +1097,9 @@ continue; /* Mapping disjoint from hole. */ zba = (hba <= vba) ? vba : hba; zea = (vea <= hea) ? vea : hea; - zap_page_range(vp, + invalidate_page_range(vp, ((zba - vba) << PAGE_SHIFT) + vp->vm_start, - (zea - zba + 1) << PAGE_SHIFT); + (zea - zba + 1) << PAGE_SHIFT, all); } } @@ -1115,8 +1117,8 @@ * up to a PAGE_SIZE boundary. A holelen of zero truncates to the * end of the file. */ -void invalidate_mmap_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen) +static void invalidate_mmap_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int all) { unsigned long hba = holebegin >> PAGE_SHIFT; unsigned long hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1133,12 +1135,19 @@ /* Protect against page fault */ atomic_inc(&mapping->truncate_count); if (unlikely(!list_empty(&mapping->i_mmap))) - invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen); + invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen, all); if (unlikely(!list_empty(&mapping->i_mmap_shared))) - invalidate_mmap_range_list(&mapping->i_mmap_shared, hba, hlen); + invalidate_mmap_range_list(&mapping->i_mmap_shared, hba, hlen, all); up(&mapping->i_shared_sem); } -EXPORT_SYMBOL_GPL(invalidate_mmap_range); + + void invalidate_filemap_range(struct address_space *mapping, + loff_t const start, loff_t const length) +{ + invalidate_mmap_range(mapping, start, length, 0); +} + +EXPORT_SYMBOL_GPL(invalidate_filemap_range); /* * Handle all mappings that got truncated by a "truncate()" @@ -1156,7 +1165,7 @@ if (inode->i_size < offset) goto do_expand; i_size_write(inode, offset); - invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0); + invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); goto out_truncate; --- 2.6.3.clean/mm/mmap.c 2004-02-17 22:58:32.000000000 -0500 +++ 2.6.3/mm/mmap.c 2004-02-19 22:46:01.000000000 -0500 @@ -1134,7 +1134,7 @@ lru_add_drain(); tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted); + unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, 1); vm_unacct_memory(nr_accounted); if (is_hugepage_only_range(start, end - start)) @@ -1436,7 +1436,7 @@ flush_cache_mm(mm); /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0, - ~0UL, &nr_accounted); + ~0UL, &nr_accounted, 1); vm_unacct_memory(nr_accounted); BUG_ON(mm->map_count); /* This is just debugging */ clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: aart@kvack.org