From: <alexlzhu@fb.com>
To: <linux-mm@kvack.org>, <kernel-team@fb.com>
Cc: <riel@surriel.com>, <hannes@cmpxchg.org>, <willy@infradead.org>,
<ningzhang@linux.alibaba.com>, <yuzhao@google.com>,
Alexander Zhu <alexlzhu@fb.com>
Subject: [PATCH v5 3/5] mm: do not remap clean subpages when splitting isolated thp
Date: Tue, 25 Oct 2022 17:26:56 -0700 [thread overview]
Message-ID: <e1f734e2877fa5cbdbf3c8424c00f00abebf4392.1666743422.git.alexlzhu@fb.com> (raw)
In-Reply-To: <cover.1666743422.git.alexlzhu@fb.com>
From: Alexander Zhu <alexlzhu@fb.com>
Changes to avoid remap on zero pages that are free'd in split_huge_page().
Pages are not remapped except in the case of userfaultfd. In the case
of userfaultfd we remap to the shared zero page, similar to what is
done by KSM.
Signed-off-by: Alexander Zhu <alexlzhu@fb.com>
---
v1 to v2
-Only trigger the unmap_clean/zap in split_huge_page on anonymous THPs. We cannot zap zero pages for file THPs.
RFC to v1
-Added support to map to the read only zero page when splitting a THP registered with userfaultfd.
include/linux/rmap.h | 2 +-
mm/huge_memory.c | 8 ++---
mm/migrate.c | 73 +++++++++++++++++++++++++++++++++++++++-----
mm/migrate_device.c | 4 +--
4 files changed, 73 insertions(+), 14 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bd3504d11b15..3f83bbcf1333 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -428,7 +428,7 @@ int folio_mkclean(struct folio *);
int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
struct vm_area_struct *vma);
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 363218e8a22a..f68a353e0adf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2373,7 +2373,7 @@ static void unmap_folio(struct folio *folio)
try_to_unmap(folio, ttu_flags | TTU_IGNORE_MLOCK);
}
-static void remap_page(struct folio *folio, unsigned long nr)
+static void remap_page(struct folio *folio, unsigned long nr, bool unmap_clean)
{
int i = 0;
@@ -2381,7 +2381,7 @@ static void remap_page(struct folio *folio, unsigned long nr)
if (!folio_test_anon(folio))
return;
for (;;) {
- remove_migration_ptes(folio, folio, true);
+ remove_migration_ptes(folio, folio, true, unmap_clean);
i += folio_nr_pages(folio);
if (i >= nr)
break;
@@ -2560,7 +2560,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
local_irq_enable();
- remap_page(folio, nr);
+ remap_page(folio, nr, PageAnon(head));
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2789,7 +2789,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping)
xas_unlock(&xas);
local_irq_enable();
- remap_page(folio, folio_nr_pages(folio));
+ remap_page(folio, folio_nr_pages(folio), false);
ret = -EBUSY;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 1379e1912772..bc96a084d925 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -30,6 +30,7 @@
#include <linux/writeback.h>
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
+#include <linux/vm_event_item.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
#include <linux/compaction.h>
@@ -168,13 +169,62 @@ void putback_movable_pages(struct list_head *l)
}
}
+static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
+{
+ void *addr;
+ bool dirty;
+ pte_t newpte;
+
+ VM_BUG_ON_PAGE(PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageAnon(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
+
+ if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
+ return false;
+
+ /*
+ * The pmd entry mapping the old thp was flushed and the pte mapping
+ * this subpage has been non present. Therefore, this subpage is
+ * inaccessible. We don't need to remap it if it contains only zeros.
+ */
+ addr = kmap_local_page(page);
+ dirty = memchr_inv(addr, 0, PAGE_SIZE);
+ kunmap_local(addr);
+
+ if (dirty)
+ return false;
+
+ pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
+
+ if (userfaultfd_armed(pvmw->vma)) {
+ newpte = pte_mkspecial(pfn_pte(page_to_pfn(ZERO_PAGE(pvmw->address)),
+ pvmw->vma->vm_page_prot));
+ ptep_clear_flush(pvmw->vma, pvmw->address, pvmw->pte);
+ set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+ dec_mm_counter(pvmw->vma->vm_mm, MM_ANONPAGES);
+ count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
+ return true;
+ }
+
+ dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
+ count_vm_event(THP_SPLIT_UNMAP);
+ return true;
+}
+
+struct rmap_walk_arg {
+ struct folio *folio;
+ bool unmap_clean;
+};
+
/*
* Restore a potential migration pte to a working pte entry
*/
static bool remove_migration_pte(struct folio *folio,
- struct vm_area_struct *vma, unsigned long addr, void *old)
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
{
- DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+ struct rmap_walk_arg *rmap_walk_arg = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
while (page_vma_mapped_walk(&pvmw)) {
rmap_t rmap_flags = RMAP_NONE;
@@ -197,6 +247,8 @@ static bool remove_migration_pte(struct folio *folio,
continue;
}
#endif
+ if (rmap_walk_arg->unmap_clean && try_to_unmap_clean(&pvmw, new))
+ continue;
folio_get(folio);
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
@@ -272,13 +324,20 @@ static bool remove_migration_pte(struct folio *folio,
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean)
{
+ struct rmap_walk_arg rmap_walk_arg = {
+ .folio = src,
+ .unmap_clean = unmap_clean,
+ };
+
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
- .arg = src,
+ .arg = &rmap_walk_arg,
};
+ VM_BUG_ON_FOLIO(unmap_clean && src != dst, src);
+
if (locked)
rmap_walk_locked(dst, &rwc);
else
@@ -872,7 +931,7 @@ static int writeout(struct address_space *mapping, struct folio *folio)
* At this point we know that the migration attempt cannot
* be successful.
*/
- remove_migration_ptes(folio, folio, false);
+ remove_migration_ptes(folio, folio, false, false);
rc = mapping->a_ops->writepage(&folio->page, &wbc);
@@ -1128,7 +1187,7 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
if (page_was_mapped)
remove_migration_ptes(src,
- rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
out_unlock_both:
folio_unlock(dst);
@@ -1338,7 +1397,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (page_was_mapped)
remove_migration_ptes(src,
- rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
unlock_put_anon:
folio_unlock(dst);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6fa682eef7a0..6508a083d7fd 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -421,7 +421,7 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
continue;
folio = page_folio(page);
- remove_migration_ptes(folio, folio, false);
+ remove_migration_ptes(folio, folio, false, false);
src_pfns[i] = 0;
folio_unlock(folio);
@@ -847,7 +847,7 @@ void migrate_device_finalize(unsigned long *src_pfns,
src = page_folio(page);
dst = page_folio(newpage);
- remove_migration_ptes(src, dst, false);
+ remove_migration_ptes(src, dst, false, false);
folio_unlock(src);
if (is_zone_device_page(page))
--
2.30.2
next prev parent reply other threads:[~2022-10-26 0:27 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-26 0:26 [PATCH v5 0/5] THP Shrinker alexlzhu
2022-10-26 0:26 ` [PATCH v5 1/5] mm: add thp_utilization metrics to debugfs alexlzhu
2022-10-26 0:26 ` [PATCH v5 2/5] mm: changes to split_huge_page() to free zero filled tail pages alexlzhu
2022-10-26 0:26 ` alexlzhu [this message]
2022-10-26 0:26 ` [PATCH v5 4/5] mm: add selftests to split_huge_page() to verify unmap/zap of zero pages alexlzhu
2022-10-26 0:26 ` [PATCH v5 5/5] mm: THP low utilization shrinker alexlzhu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e1f734e2877fa5cbdbf3c8424c00f00abebf4392.1666743422.git.alexlzhu@fb.com \
--to=alexlzhu@fb.com \
--cc=hannes@cmpxchg.org \
--cc=kernel-team@fb.com \
--cc=linux-mm@kvack.org \
--cc=ningzhang@linux.alibaba.com \
--cc=riel@surriel.com \
--cc=willy@infradead.org \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox