[PATCH v3 2/5] rmap: move page unmap operation to dedicated function

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	willy@infradead.org, mike.kravetz@oracle.com,
	sidhartha.kumar@oracle.com, naoya.horiguchi@nec.com,
	jane.chu@oracle.com, david@redhat.com
Cc: fengwei.yin@intel.com
Subject: [PATCH v3 2/5] rmap: move page unmap operation to dedicated function
Date: Mon,  6 Mar 2023 17:22:56 +0800	[thread overview]
Message-ID: <20230306092259.3507807-3-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230306092259.3507807-1-fengwei.yin@intel.com>

No functional change. Just code reorganized.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 mm/rmap.c | 369 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 194 insertions(+), 175 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 508d141dacc5..013643122d0c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1538,17 +1538,204 @@ static bool try_to_unmap_one_hugetlb(struct folio *folio,
 	return ret;
 }
 
+static bool try_to_unmap_one_page(struct folio *folio,
+		struct vm_area_struct *vma, struct mmu_notifier_range range,
+		struct page_vma_mapped_walk pvmw, unsigned long address,
+		enum ttu_flags flags)
+{
+	bool anon_exclusive, ret = true;
+	struct page *subpage;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t pteval;
+
+	subpage = folio_page(folio,
+			pte_pfn(*pvmw.pte) - folio_pfn(folio));
+	anon_exclusive = folio_test_anon(folio) &&
+		PageAnonExclusive(subpage);
+
+	flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+	/* Nuke the page table entry. */
+	if (should_defer_flush(mm, flags)) {
+		/*
+		 * We clear the PTE but do not flush so potentially
+		 * a remote CPU could still be writing to the folio.
+		 * If the entry was previously clean then the
+		 * architecture must guarantee that a clear->dirty
+		 * transition on a cached TLB entry is written through
+		 * and traps if the PTE is unmapped.
+		 */
+		pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+
+		set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+	} else {
+		pteval = ptep_clear_flush(vma, address, pvmw.pte);
+	}
+
+	/*
+	 * Now the pte is cleared. If this pte was uffd-wp armed,
+	 * we may want to replace a none pte with a marker pte if
+	 * it's file-backed, so we don't lose the tracking info.
+	 */
+	pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
+	/* Set the dirty flag on the folio now the pte is gone. */
+	if (pte_dirty(pteval))
+		folio_mark_dirty(folio);
+
+	/* Update high watermark before we lower rss */
+	update_hiwater_rss(mm);
+
+	if (PageHWPoison(subpage) && !(flags & TTU_HWPOISON)) {
+		pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		set_pte_at(mm, address, pvmw.pte, pteval);
+	} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+		/*
+		 * The guest indicated that the page content is of no
+		 * interest anymore. Simply discard the pte, vmscan
+		 * will take care of the rest.
+		 * A future reference will then fault in a new zero
+		 * page. When userfaultfd is active, we must not drop
+		 * this page though, as its main user (postcopy
+		 * migration) will not expect userfaults on already
+		 * copied pages.
+		 */
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		/* We have to invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else if (folio_test_anon(folio)) {
+		swp_entry_t entry = { .val = page_private(subpage) };
+		pte_t swp_pte;
+		/*
+		 * Store the swap location in the pte.
+		 * See handle_pte_fault() ...
+		 */
+		if (unlikely(folio_test_swapbacked(folio) !=
+					folio_test_swapcache(folio))) {
+			WARN_ON_ONCE(1);
+			ret = false;
+			/* We have to invalidate as we cleared the pte */
+			mmu_notifier_invalidate_range(mm, address,
+					address + PAGE_SIZE);
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* MADV_FREE page check */
+		if (!folio_test_swapbacked(folio)) {
+			int ref_count, map_count;
+
+			/*
+			 * Synchronize with gup_pte_range():
+			 * - clear PTE; barrier; read refcount
+			 * - inc refcount; barrier; read PTE
+			 */
+			smp_mb();
+
+			ref_count = folio_ref_count(folio);
+			map_count = folio_mapcount(folio);
+
+			/*
+			 * Order reads for page refcount and dirty flag
+			 * (see comments in __remove_mapping()).
+			 */
+			smp_rmb();
+
+			/*
+			 * The only page refs must be one from isolation
+			 * plus the rmap(s) (dropped by discard:).
+			 */
+			if (ref_count == 1 + map_count &&
+					!folio_test_dirty(folio)) {
+				/* Invalidate as we cleared the pte */
+				mmu_notifier_invalidate_range(mm,
+						address, address + PAGE_SIZE);
+				dec_mm_counter(mm, MM_ANONPAGES);
+				goto discard;
+			}
+
+			/*
+			 * If the folio was redirtied, it cannot be
+			 * discarded. Remap the page to page table.
+			 */
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			folio_set_swapbacked(folio);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		if (swap_duplicate(entry) < 0) {
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* See page_try_share_anon_rmap(): clear PTE first. */
+		if (anon_exclusive &&
+				page_try_share_anon_rmap(subpage)) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (list_empty(&mm->mmlist)) {
+			spin_lock(&mmlist_lock);
+			if (list_empty(&mm->mmlist))
+				list_add(&mm->mmlist, &init_mm.mmlist);
+			spin_unlock(&mmlist_lock);
+		}
+		dec_mm_counter(mm, MM_ANONPAGES);
+		inc_mm_counter(mm, MM_SWAPENTS);
+		swp_pte = swp_entry_to_pte(entry);
+		if (anon_exclusive)
+			swp_pte = pte_swp_mkexclusive(swp_pte);
+		if (pte_soft_dirty(pteval))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		if (pte_uffd_wp(pteval))
+			swp_pte = pte_swp_mkuffd_wp(swp_pte);
+		set_pte_at(mm, address, pvmw.pte, swp_pte);
+		/* Invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else {
+		/*
+		 * This is a locked file-backed folio,
+		 * so it cannot be removed from the page
+		 * cache and replaced by a new folio before
+		 * mmu_notifier_invalidate_range_end, so no
+		 * concurrent thread might update its page table
+		 * to point at a new folio while a device is
+		 * still using this folio.
+		 *
+		 * See Documentation/mm/mmu_notifier.rst
+		 */
+		dec_mm_counter(mm, mm_counter_file(&folio->page));
+	}
+
+discard:
+	return ret;
+}
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
 static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
-	struct mm_struct *mm = vma->vm_mm;
 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
-	pte_t pteval;
 	struct page *subpage;
-	bool anon_exclusive, ret = true;
+	bool ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
@@ -1613,179 +1800,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 
 		subpage = folio_page(folio,
 					pte_pfn(*pvmw.pte) - folio_pfn(folio));
-		anon_exclusive = folio_test_anon(folio) &&
-				 PageAnonExclusive(subpage);
-
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
-		/* Nuke the page table entry. */
-		if (should_defer_flush(mm, flags)) {
-			/*
-			 * We clear the PTE but do not flush so potentially
-			 * a remote CPU could still be writing to the folio.
-			 * If the entry was previously clean then the
-			 * architecture must guarantee that a clear->dirty
-			 * transition on a cached TLB entry is written through
-			 * and traps if the PTE is unmapped.
-			 */
-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
-
-			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
-		} else {
-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
-		}
-
-		/*
-		 * Now the pte is cleared. If this pte was uffd-wp armed,
-		 * we may want to replace a none pte with a marker pte if
-		 * it's file-backed, so we don't lose the tracking info.
-		 */
-		pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
-
-		/* Set the dirty flag on the folio now the pte is gone. */
-		if (pte_dirty(pteval))
-			folio_mark_dirty(folio);
-
-		/* Update high watermark before we lower rss */
-		update_hiwater_rss(mm);
-
-		if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
-			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			set_pte_at(mm, address, pvmw.pte, pteval);
-		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
-			/*
-			 * The guest indicated that the page content is of no
-			 * interest anymore. Simply discard the pte, vmscan
-			 * will take care of the rest.
-			 * A future reference will then fault in a new zero
-			 * page. When userfaultfd is active, we must not drop
-			 * this page though, as its main user (postcopy
-			 * migration) will not expect userfaults on already
-			 * copied pages.
-			 */
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			/* We have to invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else if (folio_test_anon(folio)) {
-			swp_entry_t entry = { .val = page_private(subpage) };
-			pte_t swp_pte;
-			/*
-			 * Store the swap location in the pte.
-			 * See handle_pte_fault() ...
-			 */
-			if (unlikely(folio_test_swapbacked(folio) !=
-					folio_test_swapcache(folio))) {
-				WARN_ON_ONCE(1);
-				ret = false;
-				/* We have to invalidate as we cleared the pte */
-				mmu_notifier_invalidate_range(mm, address,
-							address + PAGE_SIZE);
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			/* MADV_FREE page check */
-			if (!folio_test_swapbacked(folio)) {
-				int ref_count, map_count;
-
-				/*
-				 * Synchronize with gup_pte_range():
-				 * - clear PTE; barrier; read refcount
-				 * - inc refcount; barrier; read PTE
-				 */
-				smp_mb();
-
-				ref_count = folio_ref_count(folio);
-				map_count = folio_mapcount(folio);
-
-				/*
-				 * Order reads for page refcount and dirty flag
-				 * (see comments in __remove_mapping()).
-				 */
-				smp_rmb();
-
-				/*
-				 * The only page refs must be one from isolation
-				 * plus the rmap(s) (dropped by discard:).
-				 */
-				if (ref_count == 1 + map_count &&
-				    !folio_test_dirty(folio)) {
-					/* Invalidate as we cleared the pte */
-					mmu_notifier_invalidate_range(mm,
-						address, address + PAGE_SIZE);
-					dec_mm_counter(mm, MM_ANONPAGES);
-					goto discard;
-				}
-
-				/*
-				 * If the folio was redirtied, it cannot be
-				 * discarded. Remap the page to page table.
-				 */
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				folio_set_swapbacked(folio);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			if (swap_duplicate(entry) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
+		ret = try_to_unmap_one_page(folio, vma,
+						range, pvmw, address, flags);
+		if (!ret)
+			break;
 
-			/* See page_try_share_anon_rmap(): clear PTE first. */
-			if (anon_exclusive &&
-			    page_try_share_anon_rmap(subpage)) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (list_empty(&mm->mmlist)) {
-				spin_lock(&mmlist_lock);
-				if (list_empty(&mm->mmlist))
-					list_add(&mm->mmlist, &init_mm.mmlist);
-				spin_unlock(&mmlist_lock);
-			}
-			dec_mm_counter(mm, MM_ANONPAGES);
-			inc_mm_counter(mm, MM_SWAPENTS);
-			swp_pte = swp_entry_to_pte(entry);
-			if (anon_exclusive)
-				swp_pte = pte_swp_mkexclusive(swp_pte);
-			if (pte_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
-			/* Invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else {
-			/*
-			 * This is a locked file-backed folio,
-			 * so it cannot be removed from the page
-			 * cache and replaced by a new folio before
-			 * mmu_notifier_invalidate_range_end, so no
-			 * concurrent thread might update its page table
-			 * to point at a new folio while a device is
-			 * still using this folio.
-			 *
-			 * See Documentation/mm/mmu_notifier.rst
-			 */
-			dec_mm_counter(mm, mm_counter_file(&folio->page));
-		}
-discard:
 		/*
 		 * No need to call mmu_notifier_invalidate_range() it has be
 		 * done above for all cases requiring it to happen under page
-- 
2.30.2

next prev parent reply	other threads:[~2023-03-06  9:22 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-06  9:22 [PATCH v3 0/5] batched remove rmap in try_to_unmap_one() Yin Fengwei
2023-03-06  9:22 ` [PATCH v3 1/5] rmap: move hugetlb try_to_unmap to dedicated function Yin Fengwei
2023-03-08 21:38   ` Mike Kravetz
2023-03-09  5:13     ` Yin, Fengwei
2023-03-06  9:22 ` Yin Fengwei [this message]
2023-03-06  9:22 ` [PATCH v3 3/5] rmap: cleanup exit path of try_to_unmap_one_page() Yin Fengwei
2023-03-06  9:22 ` [PATCH v3 4/5] rmap:addd folio_remove_rmap_range() Yin Fengwei
2023-03-06  9:22 ` [PATCH v3 5/5] try_to_unmap_one: batched remove rmap, update folio refcount Yin Fengwei
2023-03-06 12:39   ` haoxin
2023-03-07  2:45     ` Yin, Fengwei
2023-03-06 21:12 ` [PATCH v3 0/5] batched remove rmap in try_to_unmap_one() Andrew Morton
2023-03-07  2:44   ` Yin, Fengwei
2023-03-09 13:56   ` Yin, Fengwei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230306092259.3507807-3-fengwei.yin@intel.com \
    --to=fengwei.yin@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=jane.chu@oracle.com \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=sidhartha.kumar@oracle.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox