From 21819f790e3d206ad77cd20d6e7cae86311fc87d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 21 Apr 2014 15:29:49 -0700 Subject: [PATCH 1/2] mm: move page table dirty state into TLB gather operation When tearing down a memory mapping, we have long delayed the actual freeing of the pages until after the (batched) TLB flush, since only after the TLB entries have been flushed from all CPU's do we know that none of the pages will be accessed any more. HOWEVER. Ben Herrenschmidt points out that we need to do the same thing for marking a shared mapped page dirty. Because if we mark the underlying page dirty before we have flushed the TLB's, other CPU's may happily continue to write to the page (using their stale TLB contents) after we've marked the page dirty, and they can thus race with any cleaning operation. Now, in practice, any page cleaning operations will take much longer to start the IO on the page than it will have taken us to get to the TLB flush, so this is going to be hard to trigger in real life. In fact, so far nobody has even come up with a reasonable test-case for this to show it happening. But what we do now (set_page_dirty() before flushing the TLB) really is wrong. And this commit does not fix it, but by moving the dirty handling into the TLB gather operation at least the internal interfaces now support the notion of those TLB gather interfaces doing the rigth thing. Cc: Benjamin Herrenschmidt Cc: Peter Anvin Cc: Peter Zijlstra Cc: Dave Hansen Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 6 ++++-- arch/ia64/include/asm/tlb.h | 6 ++++-- arch/s390/include/asm/tlb.h | 4 +++- arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 6 ++++-- include/asm-generic/tlb.h | 4 ++-- mm/hugetlb.c | 4 +--- mm/memory.c | 15 +++++++++------ 8 files changed, 31 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 0baf7f0d9394..ac9c16af8e63 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -165,8 +165,10 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) tlb_flush(tlb); } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->pages[tlb->nr++] = page; VM_BUG_ON(tlb->nr > tlb->max); return tlb->max - tlb->nr; @@ -174,7 +176,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index bc5efc7c3f3f..9049a7d6427d 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -193,8 +193,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) * must be delayed until after the TLB has been flushed (see comments at the beginning of * this file). */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->need_flush = 1; if (!tlb->nr && tlb->pages == tlb->local) @@ -213,7 +215,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index c544b6f05d95..8107a8c53985 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -76,8 +76,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 362192ed12fe..428de7858d27 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -90,15 +90,17 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) { } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, int dirty) { + if (dirty) + set_page_dirty(page); free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - __tlb_remove_page(tlb, page); + __tlb_remove_page(tlb, page, 0); } #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 29b0301c18aa..df557f987819 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -86,8 +86,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) * while handling the additional races in SMP caused by other CPUs * caching valid mappings in their TLBs. */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->need_flush = 1; free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ @@ -95,7 +97,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - __tlb_remove_page(tlb, page); + __tlb_remove_page(tlb, page, 0); } /** diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 5672d7ea1fa0..541c563f0ff9 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); +int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty); /* tlb_remove_page * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when @@ -124,7 +124,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 246192929a2d..dfbe23c0c200 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2516,11 +2516,9 @@ again: pte = huge_ptep_get_and_clear(mm, address, ptep); tlb_remove_tlb_entry(tlb, ptep, address); - if (huge_pte_dirty(pte)) - set_page_dirty(page); page_remove_rmap(page); - force_flush = !__tlb_remove_page(tlb, page); + force_flush = !__tlb_remove_page(tlb, page, huge_pte_dirty(pte)); if (force_flush) { spin_unlock(ptl); break; diff --git a/mm/memory.c b/mm/memory.c index d0f0bef3be48..62fdcd1995f4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -277,12 +277,15 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e * mappings in their TLBs. Returns the number of free page slots left. * When out of page slots we must call tlb_flush_mmu(). */ -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { struct mmu_gather_batch *batch; VM_BUG_ON(!tlb->need_flush); + /* FIXME! This needs to be batched too */ + if (dirty) + set_page_dirty(page); batch = tlb->active; batch->pages[batch->nr++] = page; if (batch->nr == batch->max) { @@ -1124,11 +1127,11 @@ again: pte_file_mksoft_dirty(ptfile); set_pte_at(mm, addr, pte, ptfile); } - if (PageAnon(page)) + if (PageAnon(page)) { + /* We don't bother saving dirty state for anonymous pages */ + ptent = pte_mkclean(ptent); rss[MM_ANONPAGES]--; - else { - if (pte_dirty(ptent)) - set_page_dirty(page); + } else { if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); @@ -1137,7 +1140,7 @@ again: page_remove_rmap(page); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); - force_flush = !__tlb_remove_page(tlb, page); + force_flush = !__tlb_remove_page(tlb, page, pte_dirty(ptent)); if (force_flush) break; continue; -- 1.9.1.377.g96e67c8.dirty