linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <djwong@kernel.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Christoph Hellwig <hch@lst.de>,
	John Hubbard <jhubbard@nvidia.com>,
	linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-ext4@vger.kernel.org
Subject: [PATCH v2 06/18] fsdax: Rework dax_layout_busy_page() to dax_zap_mappings()
Date: Thu, 15 Sep 2022 20:35:44 -0700	[thread overview]
Message-ID: <166329934448.2786261.3862047806587561874.stgit@dwillia2-xfh.jf.intel.com> (raw)
In-Reply-To: <166329930818.2786261.6086109734008025807.stgit@dwillia2-xfh.jf.intel.com>

In preparation for moving the truncate vs DAX-busy-page detection from
detecting _refcount == 1 to _refcount == 0, change the busy page
tracking to take refernces at dax_insert_entry(), drop references at
dax_zap_mappings() time, and finally clean out the entries at
dax_delete_mapping_entries().

This approach will rely on all paths that call truncate_inode_pages() to
first call dax_zap_mappings(). This mirrors the zapped state of pages
after unmap_mapping_range(), but since DAX pages do not maintain
_mapcount this DAX specific flow is introduced. This approach helps
address the immediate _refcount problem, but continues to kick the "DAX
without pages?" question down the road.

Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c            |   82 ++++++++++++++++++++++++++++++++++++---------------
 fs/ext4/inode.c     |    2 +
 fs/fuse/dax.c       |    4 +-
 fs/xfs/xfs_file.c   |    2 +
 fs/xfs/xfs_inode.c  |    4 +-
 include/linux/dax.h |   11 ++++---
 6 files changed, 71 insertions(+), 34 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 76bad1c095c0..616bac4b7df3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -74,11 +74,12 @@ fs_initcall(init_dax_wait_table);
  * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
  * block allocation.
  */
-#define DAX_SHIFT	(4)
+#define DAX_SHIFT	(5)
 #define DAX_LOCKED	(1UL << 0)
 #define DAX_PMD		(1UL << 1)
 #define DAX_ZERO_PAGE	(1UL << 2)
 #define DAX_EMPTY	(1UL << 3)
+#define DAX_ZAP		(1UL << 4)
 
 static unsigned long dax_to_pfn(void *entry)
 {
@@ -95,6 +96,11 @@ static bool dax_is_locked(void *entry)
 	return xa_to_value(entry) & DAX_LOCKED;
 }
 
+static bool dax_is_zapped(void *entry)
+{
+	return xa_to_value(entry) & DAX_ZAP;
+}
+
 static unsigned int dax_entry_order(void *entry)
 {
 	if (xa_to_value(entry) & DAX_PMD)
@@ -380,6 +386,7 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
 			WARN_ON_ONCE(page->mapping);
 			page->mapping = mapping;
 			page->index = index + i++;
+			page_ref_inc(page);
 		}
 	}
 }
@@ -395,31 +402,20 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
 	for_each_mapped_pfn(entry, pfn) {
 		struct page *page = pfn_to_page(pfn);
 
-		WARN_ON_ONCE(trunc && !dax_page_idle(page));
 		if (dax_mapping_is_cow(page->mapping)) {
 			/* keep the CoW flag if this page is still shared */
 			if (page->index-- > 0)
 				continue;
-		} else
+		} else {
+			WARN_ON_ONCE(trunc && !dax_is_zapped(entry));
+			WARN_ON_ONCE(trunc && !dax_page_idle(page));
 			WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		}
 		page->mapping = NULL;
 		page->index = 0;
 	}
 }
 
-static struct page *dax_busy_page(void *entry)
-{
-	unsigned long pfn;
-
-	for_each_mapped_pfn(entry, pfn) {
-		struct page *page = pfn_to_page(pfn);
-
-		if (!dax_page_idle(page))
-			return page;
-	}
-	return NULL;
-}
-
 /*
  * dax_lock_page - Lock the DAX entry corresponding to a page
  * @page: The page whose entry we want to lock
@@ -664,8 +660,46 @@ static void *grab_mapping_entry(struct xa_state *xas,
 	return xa_mk_internal(VM_FAULT_FALLBACK);
 }
 
+static void *dax_zap_entry(struct xa_state *xas, void *entry)
+{
+	unsigned long v = xa_to_value(entry);
+
+	return xas_store(xas, xa_mk_value(v | DAX_ZAP));
+}
+
+/**
+ * Return NULL if the entry is zapped and all pages in the entry are
+ * idle, otherwise return the non-idle page in the entry
+ */
+static struct page *dax_zap_pages(struct xa_state *xas, void *entry)
+{
+	struct page *ret = NULL;
+	unsigned long pfn;
+	bool zap;
+
+	if (!dax_entry_size(entry))
+		return NULL;
+
+	zap = !dax_is_zapped(entry);
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		if (zap)
+			page_ref_dec(page);
+
+		if (!ret && !dax_page_idle(page))
+			ret = page;
+	}
+
+	if (zap)
+		dax_zap_entry(xas, entry);
+
+	return ret;
+}
+
 /**
- * dax_layout_busy_page_range - find first pinned page in @mapping
+ * dax_zap_mappings_range - find first pinned page in @mapping
  * @mapping: address space to scan for a page with ref count > 1
  * @start: Starting offset. Page containing 'start' is included.
  * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
@@ -682,8 +716,8 @@ static void *grab_mapping_entry(struct xa_state *xas,
  * to be able to run unmap_mapping_range() and subsequently not race
  * mapping_mapped() becoming true.
  */
-struct page *dax_layout_busy_page_range(struct address_space *mapping,
-					loff_t start, loff_t end)
+struct page *dax_zap_mappings_range(struct address_space *mapping, loff_t start,
+				    loff_t end)
 {
 	void *entry;
 	unsigned int scanned = 0;
@@ -727,7 +761,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
 		if (unlikely(dax_is_locked(entry)))
 			entry = get_unlocked_entry(&xas, 0);
 		if (entry)
-			page = dax_busy_page(entry);
+			page = dax_zap_pages(&xas, entry);
 		put_unlocked_entry(&xas, entry, WAKE_NEXT);
 		if (page)
 			break;
@@ -742,13 +776,13 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
 	xas_unlock_irq(&xas);
 	return page;
 }
-EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
+EXPORT_SYMBOL_GPL(dax_zap_mappings_range);
 
-struct page *dax_layout_busy_page(struct address_space *mapping)
+struct page *dax_zap_mappings(struct address_space *mapping)
 {
-	return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
+	return dax_zap_mappings_range(mapping, 0, LLONG_MAX);
 }
-EXPORT_SYMBOL_GPL(dax_layout_busy_page);
+EXPORT_SYMBOL_GPL(dax_zap_mappings);
 
 static int __dax_invalidate_entry(struct address_space *mapping,
 					  pgoff_t index, bool trunc)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 326269ad3961..0ce73af69c49 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3965,7 +3965,7 @@ int ext4_break_layouts(struct inode *inode)
 		return -EINVAL;
 
 	do {
-		page = dax_layout_busy_page(inode->i_mapping);
+		page = dax_zap_mappings(inode->i_mapping);
 		if (!page)
 			return 0;
 
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index ae52ef7dbabe..8cdc9402e8f7 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -443,7 +443,7 @@ static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
 
 	/*
 	 * Can't do inline reclaim in fault path. We call
-	 * dax_layout_busy_page() before we free a range. And
+	 * dax_zap_mappings() before we free a range. And
 	 * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it.
 	 * In fault path we enter with mapping->invalidate_lock held and can't
 	 * drop it. Also in fault path we hold mapping->invalidate_lock shared
@@ -671,7 +671,7 @@ static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
 {
 	struct page *page;
 
-	page = dax_layout_busy_page_range(inode->i_mapping, start, end);
+	page = dax_zap_mappings_range(inode->i_mapping, start, end);
 	if (!page)
 		return 0;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d3ff692d5546..918ab9130c96 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,7 @@ xfs_break_dax_layouts(
 
 	ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
 
-	page = dax_layout_busy_page(inode->i_mapping);
+	page = dax_zap_mappings(inode->i_mapping);
 	if (!page)
 		return 0;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 72ce1cb72736..9bbc68500cec 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3482,8 +3482,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
 	 * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
 	 * for this nested lock case.
 	 */
-	page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
-	if (page && page_ref_count(page) != 1) {
+	page = dax_zap_mappings(VFS_I(ip2)->i_mapping);
+	if (page) {
 		xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
 		xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
 		goto again;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 04987d14d7e0..f6acb4ed73cb 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -157,8 +157,9 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct dax_device *dax_dev, struct writeback_control *wbc);
 
-struct page *dax_layout_busy_page(struct address_space *mapping);
-struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
+struct page *dax_zap_mappings(struct address_space *mapping);
+struct page *dax_zap_mappings_range(struct address_space *mapping, loff_t start,
+				    loff_t end);
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
 dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
@@ -166,12 +167,14 @@ dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
 void dax_unlock_mapping_entry(struct address_space *mapping,
 		unsigned long index, dax_entry_t cookie);
 #else
-static inline struct page *dax_layout_busy_page(struct address_space *mapping)
+static inline struct page *dax_zap_mappings(struct address_space *mapping)
 {
 	return NULL;
 }
 
-static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
+static inline struct page *dax_zap_mappings_range(struct address_space *mapping,
+						  pgoff_t start,
+						  pgoff_t nr_pages)
 {
 	return NULL;
 }



  parent reply	other threads:[~2022-09-16  3:35 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-16  3:35 [PATCH v2 00/18] Fix the DAX-gup mistake Dan Williams
2022-09-16  3:35 ` [PATCH v2 01/18] fsdax: Wait on @page not @page->_refcount Dan Williams
2022-09-20 14:30   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 02/18] fsdax: Use dax_page_idle() to document DAX busy page checking Dan Williams
2022-09-20 14:31   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 03/18] fsdax: Include unmapped inodes for page-idle detection Dan Williams
2022-09-16  3:35 ` [PATCH v2 04/18] ext4: Add ext4_break_layouts() to the inode eviction path Dan Williams
2022-09-16  3:35 ` [PATCH v2 05/18] xfs: Add xfs_break_layouts() " Dan Williams
     [not found]   ` <20220918225731.GG3600936@dread.disaster.area>
2022-09-19 16:11     ` Dan Williams
     [not found]       ` <20220919212959.GL3600936@dread.disaster.area>
2022-09-20 16:44         ` Dan Williams
     [not found]           ` <20220921221416.GT3600936@dread.disaster.area>
2022-09-21 22:28             ` Jason Gunthorpe
     [not found]               ` <20220923001846.GX3600936@dread.disaster.area>
2022-09-23  0:41                 ` Dan Williams
     [not found]                   ` <20220923021012.GZ3600936@dread.disaster.area>
2022-09-23  9:38                     ` Jan Kara
2022-09-23 23:06                       ` Dan Williams
     [not found]                       ` <20220925235407.GA3600936@dread.disaster.area>
2022-09-26 14:10                         ` Jan Kara
2022-09-29 23:33                           ` Dan Williams
2022-09-30 13:41                             ` Jan Kara
2022-09-30 17:56                               ` Dan Williams
2022-09-30 18:06                                 ` Jason Gunthorpe
2022-09-30 18:46                                   ` Dan Williams
2022-10-03  7:55                                   ` Jan Kara
2022-09-23 12:39                     ` Jason Gunthorpe
     [not found]                       ` <20220926003430.GB3600936@dread.disaster.area>
2022-09-26 13:04                         ` Jason Gunthorpe
2022-09-22  0:02             ` Dan Williams
2022-09-22  0:10               ` Jason Gunthorpe
2022-09-16  3:35 ` Dan Williams [this message]
2022-09-16  3:35 ` [PATCH v2 07/18] fsdax: Update dax_insert_entry() calling convention to return an error Dan Williams
2022-09-16  3:35 ` [PATCH v2 08/18] fsdax: Cleanup dax_associate_entry() Dan Williams
2022-09-16  3:36 ` [PATCH v2 09/18] fsdax: Rework dax_insert_entry() calling convention Dan Williams
2022-09-16  3:36 ` [PATCH v2 10/18] fsdax: Manage pgmap references at entry insertion and deletion Dan Williams
2022-09-21 14:03   ` Jason Gunthorpe
2022-09-21 15:18     ` Dan Williams
2022-09-21 21:38       ` Dan Williams
2022-09-21 22:07         ` Jason Gunthorpe
2022-09-22  0:14           ` Dan Williams
2022-09-22  0:25             ` Jason Gunthorpe
2022-09-22  2:17               ` Dan Williams
2022-09-22 17:55                 ` Jason Gunthorpe
2022-09-22 21:54                   ` Dan Williams
2022-09-23  1:36                     ` Dave Chinner
2022-09-23  2:01                       ` Dan Williams
2022-09-23 13:24                     ` Jason Gunthorpe
2022-09-23 16:29                       ` Dan Williams
2022-09-23 17:42                         ` Jason Gunthorpe
2022-09-23 19:03                           ` Dan Williams
2022-09-23 19:23                             ` Jason Gunthorpe
2022-09-27  6:07                             ` Alistair Popple
2022-09-27 12:56                               ` Jason Gunthorpe
2022-09-16  3:36 ` [PATCH v2 11/18] devdax: Minor warning fixups Dan Williams
2022-09-16  3:36 ` [PATCH v2 12/18] devdax: Move address_space helpers to the DAX core Dan Williams
2022-09-27  6:20   ` Alistair Popple
2022-09-29 22:38     ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 13/18] dax: Prep mapping helpers for compound pages Dan Williams
2022-09-21 14:06   ` Jason Gunthorpe
2022-09-21 15:19     ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 14/18] devdax: add PUD support to the DAX mapping infrastructure Dan Williams
2022-09-16  3:36 ` [PATCH v2 15/18] devdax: Use dax_insert_entry() + dax_delete_mapping_entry() Dan Williams
2022-09-21 14:10   ` Jason Gunthorpe
2022-09-21 15:48     ` Dan Williams
2022-09-21 22:23       ` Jason Gunthorpe
2022-09-22  0:15         ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 16/18] mm/memremap_pages: Support initializing pages to a zero reference count Dan Williams
2022-09-21 15:24   ` Jason Gunthorpe
2022-09-21 23:45     ` Dan Williams
2022-09-22  0:03       ` Alistair Popple
2022-09-22  0:04       ` Jason Gunthorpe
2022-09-22  0:34         ` Dan Williams
2022-09-22  1:36           ` Alistair Popple
2022-09-22  2:34             ` Dan Williams
2022-09-26  6:17               ` Alistair Popple
2022-09-22  0:13       ` John Hubbard
2022-09-16  3:36 ` [PATCH v2 17/18] fsdax: Delete put_devmap_managed_page_refs() Dan Williams
2022-09-16  3:36 ` [PATCH v2 18/18] mm/gup: Drop DAX pgmap accounting Dan Williams
2022-09-20 14:29 ` [PATCH v2 00/18] Fix the DAX-gup mistake Jason Gunthorpe
2022-09-20 16:50   ` Dan Williams
2022-11-09  0:20 ` Andrew Morton
2022-11-09 11:38   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166329934448.2786261.3862047806587561874.stgit@dwillia2-xfh.jf.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox