From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
willy@infradead.org, mike.kravetz@oracle.com,
sidhartha.kumar@oracle.com, naoya.horiguchi@nec.com,
jane.chu@oracle.com, david@redhat.com
Cc: fengwei.yin@intel.com
Subject: [PATCH v4 2/5] rmap: move page unmap operation to dedicated function
Date: Mon, 13 Mar 2023 20:45:23 +0800 [thread overview]
Message-ID: <20230313124526.1207490-3-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230313124526.1207490-1-fengwei.yin@intel.com>
No functional change. Just code reorganized.
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
mm/rmap.c | 369 ++++++++++++++++++++++++++++--------------------------
1 file changed, 194 insertions(+), 175 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c
index 3a2e3ccb8031..23eda671447a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1538,17 +1538,204 @@ static bool try_to_unmap_one_hugetlb(struct folio *folio,
return ret;
}
+static bool try_to_unmap_one_page(struct folio *folio,
+ struct vm_area_struct *vma, struct mmu_notifier_range range,
+ struct page_vma_mapped_walk pvmw, unsigned long address,
+ enum ttu_flags flags)
+{
+ bool anon_exclusive, ret = true;
+ struct page *subpage;
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t pteval;
+
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ anon_exclusive = folio_test_anon(folio) &&
+ PageAnonExclusive(subpage);
+
+ flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+ /* Nuke the page table entry. */
+ if (should_defer_flush(mm, flags)) {
+ /*
+ * We clear the PTE but do not flush so potentially
+ * a remote CPU could still be writing to the folio.
+ * If the entry was previously clean then the
+ * architecture must guarantee that a clear->dirty
+ * transition on a cached TLB entry is written through
+ * and traps if the PTE is unmapped.
+ */
+ pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+
+ set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+ } else {
+ pteval = ptep_clear_flush(vma, address, pvmw.pte);
+ }
+
+ /*
+ * Now the pte is cleared. If this pte was uffd-wp armed,
+ * we may want to replace a none pte with a marker pte if
+ * it's file-backed, so we don't lose the tracking info.
+ */
+ pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
+ /* Set the dirty flag on the folio now the pte is gone. */
+ if (pte_dirty(pteval))
+ folio_mark_dirty(folio);
+
+ /* Update high watermark before we lower rss */
+ update_hiwater_rss(mm);
+
+ if (PageHWPoison(subpage) && !(flags & TTU_HWPOISON)) {
+ pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+ dec_mm_counter(mm, mm_counter(&folio->page));
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+ /*
+ * The guest indicated that the page content is of no
+ * interest anymore. Simply discard the pte, vmscan
+ * will take care of the rest.
+ * A future reference will then fault in a new zero
+ * page. When userfaultfd is active, we must not drop
+ * this page though, as its main user (postcopy
+ * migration) will not expect userfaults on already
+ * copied pages.
+ */
+ dec_mm_counter(mm, mm_counter(&folio->page));
+ /* We have to invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
+ } else if (folio_test_anon(folio)) {
+ swp_entry_t entry = { .val = page_private(subpage) };
+ pte_t swp_pte;
+ /*
+ * Store the swap location in the pte.
+ * See handle_pte_fault() ...
+ */
+ if (unlikely(folio_test_swapbacked(folio) !=
+ folio_test_swapcache(folio))) {
+ WARN_ON_ONCE(1);
+ ret = false;
+ /* We have to invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
+ page_vma_mapped_walk_done(&pvmw);
+ goto discard;
+ }
+
+ /* MADV_FREE page check */
+ if (!folio_test_swapbacked(folio)) {
+ int ref_count, map_count;
+
+ /*
+ * Synchronize with gup_pte_range():
+ * - clear PTE; barrier; read refcount
+ * - inc refcount; barrier; read PTE
+ */
+ smp_mb();
+
+ ref_count = folio_ref_count(folio);
+ map_count = folio_mapcount(folio);
+
+ /*
+ * Order reads for page refcount and dirty flag
+ * (see comments in __remove_mapping()).
+ */
+ smp_rmb();
+
+ /*
+ * The only page refs must be one from isolation
+ * plus the rmap(s) (dropped by discard:).
+ */
+ if (ref_count == 1 + map_count &&
+ !folio_test_dirty(folio)) {
+ /* Invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm,
+ address, address + PAGE_SIZE);
+ dec_mm_counter(mm, MM_ANONPAGES);
+ goto discard;
+ }
+
+ /*
+ * If the folio was redirtied, it cannot be
+ * discarded. Remap the page to page table.
+ */
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ folio_set_swapbacked(folio);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ goto discard;
+ }
+
+ if (swap_duplicate(entry) < 0) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ goto discard;
+ }
+ if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+ swap_free(entry);
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ goto discard;
+ }
+
+ /* See page_try_share_anon_rmap(): clear PTE first. */
+ if (anon_exclusive &&
+ page_try_share_anon_rmap(subpage)) {
+ swap_free(entry);
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ goto discard;
+ }
+ if (list_empty(&mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ if (list_empty(&mm->mmlist))
+ list_add(&mm->mmlist, &init_mm.mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ dec_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter(mm, MM_SWAPENTS);
+ swp_pte = swp_entry_to_pte(entry);
+ if (anon_exclusive)
+ swp_pte = pte_swp_mkexclusive(swp_pte);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ /* Invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
+ } else {
+ /*
+ * This is a locked file-backed folio,
+ * so it cannot be removed from the page
+ * cache and replaced by a new folio before
+ * mmu_notifier_invalidate_range_end, so no
+ * concurrent thread might update its page table
+ * to point at a new folio while a device is
+ * still using this folio.
+ *
+ * See Documentation/mm/mmu_notifier.rst
+ */
+ dec_mm_counter(mm, mm_counter_file(&folio->page));
+ }
+
+discard:
+ return ret;
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/
static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
- struct mm_struct *mm = vma->vm_mm;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
- pte_t pteval;
struct page *subpage;
- bool anon_exclusive, ret = true;
+ bool ret = true;
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
@@ -1613,179 +1800,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
subpage = folio_page(folio,
pte_pfn(*pvmw.pte) - folio_pfn(folio));
- anon_exclusive = folio_test_anon(folio) &&
- PageAnonExclusive(subpage);
-
- flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
- /* Nuke the page table entry. */
- if (should_defer_flush(mm, flags)) {
- /*
- * We clear the PTE but do not flush so potentially
- * a remote CPU could still be writing to the folio.
- * If the entry was previously clean then the
- * architecture must guarantee that a clear->dirty
- * transition on a cached TLB entry is written through
- * and traps if the PTE is unmapped.
- */
- pteval = ptep_get_and_clear(mm, address, pvmw.pte);
-
- set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
- } else {
- pteval = ptep_clear_flush(vma, address, pvmw.pte);
- }
-
- /*
- * Now the pte is cleared. If this pte was uffd-wp armed,
- * we may want to replace a none pte with a marker pte if
- * it's file-backed, so we don't lose the tracking info.
- */
- pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
-
- /* Set the dirty flag on the folio now the pte is gone. */
- if (pte_dirty(pteval))
- folio_mark_dirty(folio);
-
- /* Update high watermark before we lower rss */
- update_hiwater_rss(mm);
-
- if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
- pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
- dec_mm_counter(mm, mm_counter(&folio->page));
- set_pte_at(mm, address, pvmw.pte, pteval);
- } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
- /*
- * The guest indicated that the page content is of no
- * interest anymore. Simply discard the pte, vmscan
- * will take care of the rest.
- * A future reference will then fault in a new zero
- * page. When userfaultfd is active, we must not drop
- * this page though, as its main user (postcopy
- * migration) will not expect userfaults on already
- * copied pages.
- */
- dec_mm_counter(mm, mm_counter(&folio->page));
- /* We have to invalidate as we cleared the pte */
- mmu_notifier_invalidate_range(mm, address,
- address + PAGE_SIZE);
- } else if (folio_test_anon(folio)) {
- swp_entry_t entry = { .val = page_private(subpage) };
- pte_t swp_pte;
- /*
- * Store the swap location in the pte.
- * See handle_pte_fault() ...
- */
- if (unlikely(folio_test_swapbacked(folio) !=
- folio_test_swapcache(folio))) {
- WARN_ON_ONCE(1);
- ret = false;
- /* We have to invalidate as we cleared the pte */
- mmu_notifier_invalidate_range(mm, address,
- address + PAGE_SIZE);
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
-
- /* MADV_FREE page check */
- if (!folio_test_swapbacked(folio)) {
- int ref_count, map_count;
-
- /*
- * Synchronize with gup_pte_range():
- * - clear PTE; barrier; read refcount
- * - inc refcount; barrier; read PTE
- */
- smp_mb();
-
- ref_count = folio_ref_count(folio);
- map_count = folio_mapcount(folio);
-
- /*
- * Order reads for page refcount and dirty flag
- * (see comments in __remove_mapping()).
- */
- smp_rmb();
-
- /*
- * The only page refs must be one from isolation
- * plus the rmap(s) (dropped by discard:).
- */
- if (ref_count == 1 + map_count &&
- !folio_test_dirty(folio)) {
- /* Invalidate as we cleared the pte */
- mmu_notifier_invalidate_range(mm,
- address, address + PAGE_SIZE);
- dec_mm_counter(mm, MM_ANONPAGES);
- goto discard;
- }
-
- /*
- * If the folio was redirtied, it cannot be
- * discarded. Remap the page to page table.
- */
- set_pte_at(mm, address, pvmw.pte, pteval);
- folio_set_swapbacked(folio);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
-
- if (swap_duplicate(entry) < 0) {
- set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
- if (arch_unmap_one(mm, vma, address, pteval) < 0) {
- swap_free(entry);
- set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
+ ret = try_to_unmap_one_page(folio, vma,
+ range, pvmw, address, flags);
+ if (!ret)
+ break;
- /* See page_try_share_anon_rmap(): clear PTE first. */
- if (anon_exclusive &&
- page_try_share_anon_rmap(subpage)) {
- swap_free(entry);
- set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
- if (list_empty(&mm->mmlist)) {
- spin_lock(&mmlist_lock);
- if (list_empty(&mm->mmlist))
- list_add(&mm->mmlist, &init_mm.mmlist);
- spin_unlock(&mmlist_lock);
- }
- dec_mm_counter(mm, MM_ANONPAGES);
- inc_mm_counter(mm, MM_SWAPENTS);
- swp_pte = swp_entry_to_pte(entry);
- if (anon_exclusive)
- swp_pte = pte_swp_mkexclusive(swp_pte);
- if (pte_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- set_pte_at(mm, address, pvmw.pte, swp_pte);
- /* Invalidate as we cleared the pte */
- mmu_notifier_invalidate_range(mm, address,
- address + PAGE_SIZE);
- } else {
- /*
- * This is a locked file-backed folio,
- * so it cannot be removed from the page
- * cache and replaced by a new folio before
- * mmu_notifier_invalidate_range_end, so no
- * concurrent thread might update its page table
- * to point at a new folio while a device is
- * still using this folio.
- *
- * See Documentation/mm/mmu_notifier.rst
- */
- dec_mm_counter(mm, mm_counter_file(&folio->page));
- }
-discard:
/*
* No need to call mmu_notifier_invalidate_range() it has be
* done above for all cases requiring it to happen under page
--
2.30.2
next prev parent reply other threads:[~2023-03-13 12:44 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-13 12:45 [PATCH v4 0/5] batched remove rmap in try_to_unmap_one() Yin Fengwei
2023-03-13 12:45 ` [PATCH v4 1/5] rmap: move hugetlb try_to_unmap to dedicated function Yin Fengwei
2023-03-13 12:45 ` Yin Fengwei [this message]
2023-03-13 12:45 ` [PATCH v4 3/5] rmap: cleanup exit path of try_to_unmap_one_page() Yin Fengwei
2023-03-13 12:45 ` [PATCH v4 4/5] rmap:addd folio_remove_rmap_range() Yin Fengwei
2023-03-13 12:45 ` [PATCH v4 5/5] try_to_unmap_one: batched remove rmap, update folio refcount Yin Fengwei
2023-03-13 18:49 ` [PATCH v4 0/5] batched remove rmap in try_to_unmap_one() Andrew Morton
2023-03-14 3:09 ` Yin Fengwei
2023-03-14 9:16 ` David Hildenbrand
2023-03-14 9:48 ` Matthew Wilcox
2023-03-14 9:50 ` David Hildenbrand
2023-03-14 14:50 ` Yin, Fengwei
2023-03-14 15:01 ` Matthew Wilcox
2023-03-15 2:17 ` Yin Fengwei
2023-03-20 13:47 ` Yin, Fengwei
2023-03-21 14:17 ` David Hildenbrand
2023-03-22 1:31 ` Yin Fengwei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230313124526.1207490-3-fengwei.yin@intel.com \
--to=fengwei.yin@intel.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=jane.chu@oracle.com \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=naoya.horiguchi@nec.com \
--cc=sidhartha.kumar@oracle.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox