From: Alistair Popple <apopple@nvidia.com>
To: dan.j.williams@intel.com, linux-mm@kvack.org
Cc: Alistair Popple <apopple@nvidia.com>,
vishal.l.verma@intel.com, dave.jiang@intel.com,
logang@deltatee.com, bhelgaas@google.com, jack@suse.cz,
jgg@ziepe.ca, catalin.marinas@arm.com, will@kernel.org,
mpe@ellerman.id.au, npiggin@gmail.com,
dave.hansen@linux.intel.com, ira.weiny@intel.com,
willy@infradead.org, djwong@kernel.org, tytso@mit.edu,
linmiaohe@huawei.com, david@redhat.com, peterx@redhat.com,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linuxppc-dev@lists.ozlabs.org, nvdimm@lists.linux.dev,
linux-cxl@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org,
jhubbard@nvidia.com, hch@lst.de, david@fromorbit.com
Subject: [PATCH 09/12] mm: Update vm_normal_page() callers to accept FS DAX pages
Date: Tue, 10 Sep 2024 14:14:34 +1000 [thread overview]
Message-ID: <64f1664980bed3da01b771afdfc4056825b61277.1725941415.git-series.apopple@nvidia.com> (raw)
In-Reply-To: <cover.9f0e45d52f5cff58807831b6b867084d0b14b61c.1725941415.git-series.apopple@nvidia.com>
Currently if a PTE points to a FS DAX page vm_normal_page() will
return NULL as these have their own special refcounting scheme. A
future change will allow FS DAX pages to be refcounted the same as any
other normal page.
Therefore vm_normal_page() will start returning FS DAX pages. To avoid
any change in behaviour callers that don't expect FS DAX pages will
need to explicitly check for this. As vm_normal_page() can already
return ZONE_DEVICE pages most callers already include a check for any
ZONE_DEVICE page.
However some callers don't, so add explicit checks where required.
Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
arch/x86/mm/pat/memtype.c | 4 +++-
fs/proc/task_mmu.c | 16 ++++++++++++----
mm/memcontrol-v1.c | 2 +-
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 1fa0bf6..eb84593 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -951,6 +951,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
resource_size_t *phys)
{
+ struct folio *folio;
pte_t *ptep, pte;
spinlock_t *ptl;
@@ -960,7 +961,8 @@ static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
pte = ptep_get(ptep);
/* Never return PFNs of anon folios in COW mappings. */
- if (vm_normal_folio(vma, vma->vm_start, pte)) {
+ folio = vm_normal_folio(vma, vma->vm_start, pte);
+ if (folio || (folio && !folio_is_device_dax(folio))) {
pte_unmap_unlock(ptep, ptl);
return -EINVAL;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5f171ad..456b010 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -816,6 +816,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
+ if (page && is_device_dax_page(page))
+ page = NULL;
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
@@ -864,6 +866,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
if (pmd_present(*pmd)) {
page = vm_normal_page_pmd(vma, addr, *pmd);
+ if (page && is_device_dax_page(page))
+ page = NULL;
present = true;
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
@@ -1385,7 +1389,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
return false;
folio = vm_normal_folio(vma, addr, pte);
- if (!folio)
+ if (!folio || folio_is_device_dax(folio))
return false;
return folio_maybe_dma_pinned(folio);
}
@@ -1710,6 +1714,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
frame = pte_pfn(pte);
flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
+ if (page && is_device_dax_page(page))
+ page = NULL;
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
@@ -2096,7 +2102,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page(vma, addr, pte);
- if (page && !PageAnon(page))
+ if (page && !PageAnon(page) &&
+ !is_device_dax_page(page))
categories |= PAGE_IS_FILE;
}
@@ -2158,7 +2165,8 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page_pmd(vma, addr, pmd);
- if (page && !PageAnon(page))
+ if (page && !PageAnon(page) &&
+ !is_device_dax_page(page))
categories |= PAGE_IS_FILE;
}
@@ -2919,7 +2927,7 @@ static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
return NULL;
page = vm_normal_page_pmd(vma, addr, pmd);
- if (!page)
+ if (!page || is_device_dax_page(page))
return NULL;
if (PageReserved(page))
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index b37c0d8..e16053c 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -667,7 +667,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
{
struct page *page = vm_normal_page(vma, addr, ptent);
- if (!page)
+ if (!page || is_device_dax_page(page))
return NULL;
if (PageAnon(page)) {
if (!(mc.flags & MOVE_ANON))
--
git-series 0.9.1
next prev parent reply other threads:[~2024-09-10 4:15 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-10 4:14 [PATCH 00/12] fs/dax: Fix FS DAX page reference counts Alistair Popple
2024-09-10 4:14 ` [PATCH 01/12] mm/gup.c: Remove redundant check for PCI P2PDMA page Alistair Popple
2024-09-22 1:00 ` Dan Williams
2024-09-10 4:14 ` [PATCH 02/12] pci/p2pdma: Don't initialise page refcount to one Alistair Popple
2024-09-10 13:47 ` Bjorn Helgaas
2024-09-11 1:07 ` Alistair Popple
2024-09-11 13:51 ` Bjorn Helgaas
2024-09-11 0:48 ` Logan Gunthorpe
2024-10-11 0:20 ` Alistair Popple
2024-09-22 1:00 ` Dan Williams
2024-10-11 0:17 ` Alistair Popple
2024-09-10 4:14 ` [PATCH 03/12] fs/dax: Refactor wait for dax idle page Alistair Popple
2024-09-22 1:01 ` Dan Williams
2024-09-10 4:14 ` [PATCH 04/12] mm: Allow compound zone device pages Alistair Popple
2024-09-10 4:47 ` Matthew Wilcox
2024-09-10 6:57 ` Alistair Popple
2024-09-10 13:41 ` Matthew Wilcox
2024-09-12 12:44 ` kernel test robot
2024-09-12 12:44 ` kernel test robot
2024-09-22 1:01 ` Dan Williams
2024-09-10 4:14 ` [PATCH 05/12] mm/memory: Add dax_insert_pfn Alistair Popple
2024-09-22 1:41 ` Dan Williams
2024-10-01 10:43 ` Gerald Schaefer
2024-09-10 4:14 ` [PATCH 06/12] huge_memory: Allow mappings of PUD sized pages Alistair Popple
2024-09-22 2:07 ` Dan Williams
2024-10-14 6:33 ` Alistair Popple
2024-09-10 4:14 ` [PATCH 07/12] huge_memory: Allow mappings of PMD " Alistair Popple
2024-09-27 2:48 ` Dan Williams
2024-10-14 6:53 ` Alistair Popple
2024-10-23 23:14 ` Alistair Popple
2024-10-23 23:38 ` Dan Williams
2024-09-10 4:14 ` [PATCH 08/12] gup: Don't allow FOLL_LONGTERM pinning of FS DAX pages Alistair Popple
2024-09-25 0:17 ` Dan Williams
2024-09-27 2:52 ` Dan Williams
2024-10-14 7:03 ` Alistair Popple
2024-09-10 4:14 ` Alistair Popple [this message]
2024-09-27 7:15 ` [PATCH 09/12] mm: Update vm_normal_page() callers to accept " Dan Williams
2024-10-14 7:16 ` Alistair Popple
2024-09-10 4:14 ` [PATCH 10/12] fs/dax: Properly refcount fs dax pages Alistair Popple
2024-09-27 7:59 ` Dan Williams
2024-10-24 7:52 ` Alistair Popple
2024-10-24 23:52 ` Dan Williams
2024-10-25 2:46 ` Alistair Popple
2024-10-25 4:35 ` Dan Williams
2024-10-28 4:24 ` Alistair Popple
2024-10-29 2:03 ` Dan Williams
2024-10-30 5:57 ` Alistair Popple
2024-09-10 4:14 ` [PATCH 11/12] mm: Remove pXX_devmap callers Alistair Popple
2024-09-27 12:29 ` Alexander Gordeev
2024-10-14 7:14 ` Alistair Popple
2024-09-10 4:14 ` [PATCH 12/12] mm: Remove devmap related functions and page table bits Alistair Popple
2024-09-11 7:47 ` Chunyan Zhang
2024-09-12 12:55 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=64f1664980bed3da01b771afdfc4056825b61277.1725941415.git-series.apopple@nvidia.com \
--to=apopple@nvidia.com \
--cc=bhelgaas@google.com \
--cc=catalin.marinas@arm.com \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=dave.jiang@intel.com \
--cc=david@fromorbit.com \
--cc=david@redhat.com \
--cc=djwong@kernel.org \
--cc=hch@lst.de \
--cc=ira.weiny@intel.com \
--cc=jack@suse.cz \
--cc=jgg@ziepe.ca \
--cc=jhubbard@nvidia.com \
--cc=linmiaohe@huawei.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=logang@deltatee.com \
--cc=mpe@ellerman.id.au \
--cc=npiggin@gmail.com \
--cc=nvdimm@lists.linux.dev \
--cc=peterx@redhat.com \
--cc=tytso@mit.edu \
--cc=vishal.l.verma@intel.com \
--cc=will@kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox