linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: linux-xfs@vger.kernel.org, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	linux-nvdimm@lists.01.org, Dave Chinner <david@fromorbit.com>,
	linux-kernel@vger.kernel.org, hch@lst.de,
	"J. Bruce Fields" <bfields@fieldses.org>,
	linux-mm@kvack.org, Jeff Moyer <jmoyer@redhat.com>,
	linux-fsdevel@vger.kernel.org,
	Jeff Layton <jlayton@poochiereds.net>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: [PATCH v3 13/13] xfs: wire up FL_ALLOCATED support
Date: Thu, 19 Oct 2017 19:40:08 -0700	[thread overview]
Message-ID: <150846720846.24336.10565514769202466327.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150846713528.24336.4459262264611579791.stgit@dwillia2-desk3.amr.corp.intel.com>

Before xfs can be sure that it is safe to truncate it needs to hold
XFS_MMAP_LOCK_EXCL and flush any FL_ALLOCATED leases.  Introduce
xfs_break_allocated() modeled after xfs_break_layouts() for use in the
file space deletion path.

We also use a new address_space_operation for the fs/dax core to
coordinate reaping these leases in the case where there is no active
truncate process to reap them.

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/xfs/xfs_aops.c  |   24 ++++++++++++++++++++
 fs/xfs/xfs_file.c  |   64 ++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/xfs/xfs_inode.h |    1 +
 fs/xfs/xfs_ioctl.c |    7 ++----
 4 files changed, 86 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..00da08d0d6db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1455,6 +1455,29 @@ xfs_vm_set_page_dirty(
 	return newly_dirty;
 }
 
+/*
+ * Reap any in-flight FL_ALLOCATE leases when the pages represented by
+ * that lease are no longer under dma. We hold XFS_MMAPLOCK_EXCL to
+ * synchronize with the file space deletion path that may be doing the
+ * same operation.
+ */
+static void
+xfs_vm_dax_flush_dma(
+	struct inode		*inode)
+{
+	uint			iolock = XFS_MMAPLOCK_EXCL;
+
+	/*
+	 * try to catch cases where the inode dax mode was changed
+	 * without first synchronizing leases
+	 */
+	WARN_ON_ONCE(!IS_DAX(inode));
+
+	xfs_ilock(XFS_I(inode), iolock);
+	xfs_break_allocated(inode, &iolock);
+	xfs_iunlock(XFS_I(inode), iolock);
+}
+
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
@@ -1468,4 +1491,5 @@ const struct address_space_operations xfs_address_space_operations = {
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
+	.dax_flush_dma		= xfs_vm_dax_flush_dma,
 };
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c6780743f8ec..5bc72f1da301 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -40,6 +40,7 @@
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
 
+#include <linux/dax.h>
 #include <linux/dcache.h>
 #include <linux/falloc.h>
 #include <linux/pagevec.h>
@@ -746,6 +747,39 @@ xfs_file_write_iter(
 	return ret;
 }
 
+/*
+ * DAX breaks the traditional truncate model that assumes in-flight DMA
+ * to a file-backed page can continue until the final put of the page
+ * regardless of that page's relationship to the file. In the case of
+ * DAX the page has 1:1 relationship with filesytem blocks. We need to
+ * hold off truncate while any DMA might be in-flight. This assumes that
+ * all DMA usage is transient, any non-transient usages of
+ * get_user_pages must be disallowed for DAX files.
+ *
+ * This also unlocks FL_LAYOUT leases.
+ */
+int
+xfs_break_allocated(
+	struct inode		*inode,
+	uint			*iolock)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			error;
+
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL
+				| XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL));
+
+	while ((error = break_allocated(inode, false) == -EWOULDBLOCK)) {
+		xfs_iunlock(ip, *iolock);
+		error = break_allocated(inode, true);
+		*iolock &= ~XFS_MMAPLOCK_SHARED|XFS_IOLOCK_SHARED;
+		*iolock |= XFS_MMAPLOCK_EXCL|XFS_IOLOCK_EXCL;
+		xfs_ilock(ip, *iolock);
+	}
+
+	return error;
+}
+
 #define	XFS_FALLOC_FL_SUPPORTED						\
 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
 		 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |	\
@@ -762,7 +796,7 @@ xfs_file_fallocate(
 	struct xfs_inode	*ip = XFS_I(inode);
 	long			error;
 	enum xfs_prealloc_flags	flags = 0;
-	uint			iolock = XFS_IOLOCK_EXCL;
+	uint			iolock = XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL;
 	loff_t			new_size = 0;
 	bool			do_file_insert = 0;
 
@@ -772,13 +806,10 @@ xfs_file_fallocate(
 		return -EOPNOTSUPP;
 
 	xfs_ilock(ip, iolock);
-	error = xfs_break_layouts(inode, &iolock);
+	error = xfs_break_allocated(inode, &iolock);
 	if (error)
 		goto out_unlock;
 
-	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-	iolock |= XFS_MMAPLOCK_EXCL;
-
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
 		error = xfs_free_file_space(ip, offset, len);
 		if (error)
@@ -1136,6 +1167,28 @@ xfs_file_mmap(
 	return 0;
 }
 
+/*
+ * Any manipulation of FL_ALLOCATED leases need to be coordinated with
+ * XFS_MMAPLOCK_EXCL to synchronize get_user_pages() + DMA vs truncate.
+ */
+static int
+xfs_file_setlease(
+	struct file		*filp,
+	long			arg,
+	struct file_lock	**flp,
+	void			**priv)
+{
+	struct inode		*inode = file_inode(filp);
+	struct xfs_inode 	*ip = XFS_I(inode);
+	uint			iolock = XFS_MMAPLOCK_EXCL;
+	int			error;
+
+	xfs_ilock(ip, iolock);
+	error = generic_setlease(filp, arg, flp, priv);
+	xfs_iunlock(ip, iolock);
+	return error;
+}
+
 const struct file_operations xfs_file_operations = {
 	.llseek		= xfs_file_llseek,
 	.read_iter	= xfs_file_read_iter,
@@ -1154,6 +1207,7 @@ const struct file_operations xfs_file_operations = {
 	.fallocate	= xfs_file_fallocate,
 	.clone_file_range = xfs_file_clone_range,
 	.dedupe_file_range = xfs_file_dedupe_range,
+	.setlease	= xfs_file_setlease,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0ee453de239a..e0d421884fe4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -445,6 +445,7 @@ int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
 		     xfs_fsize_t isize, bool *did_zeroing);
 int	xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
 		bool *did_zero);
+int	xfs_break_allocated(struct inode *inode, uint *iolock);
 
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index aa75389be8cf..5be60c74bede 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -612,7 +612,7 @@ xfs_ioc_space(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct iattr		iattr;
 	enum xfs_prealloc_flags	flags = 0;
-	uint			iolock = XFS_IOLOCK_EXCL;
+	uint			iolock = XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL;
 	int			error;
 
 	/*
@@ -642,13 +642,10 @@ xfs_ioc_space(
 		return error;
 
 	xfs_ilock(ip, iolock);
-	error = xfs_break_layouts(inode, &iolock);
+	error = xfs_break_allocated(inode, &iolock);
 	if (error)
 		goto out_unlock;
 
-	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-	iolock |= XFS_MMAPLOCK_EXCL;
-
 	switch (bf->l_whence) {
 	case 0: /*SEEK_SET*/
 		break;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-10-20  2:46 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-20  2:38 [PATCH v3 00/13] dax: fix dma vs truncate and remove 'page-less' support Dan Williams
2017-10-20  2:39 ` [PATCH v3 01/13] dax: quiet bdev_dax_supported() Dan Williams
2017-10-20  2:39 ` [PATCH v3 02/13] dax: require 'struct page' for filesystem dax Dan Williams
2017-10-20  7:57   ` Christoph Hellwig
2017-10-20 15:23     ` Dan Williams
2017-10-20 16:29       ` Christoph Hellwig
2017-10-20 22:29         ` Dan Williams
2017-10-21  3:20           ` Matthew Wilcox
2017-10-21  4:16             ` Dan Williams
2017-10-21  8:15               ` Christoph Hellwig
2017-10-23  5:18         ` Martin Schwidefsky
2017-10-23  8:55           ` Dan Williams
2017-10-23 10:44             ` Martin Schwidefsky
2017-10-23 11:20               ` Dan Williams
2017-10-20  2:39 ` [PATCH v3 03/13] dax: stop using VM_MIXEDMAP for dax Dan Williams
2017-10-20  2:39 ` [PATCH v3 04/13] dax: stop using VM_HUGEPAGE " Dan Williams
2017-10-20  2:39 ` [PATCH v3 05/13] dax: stop requiring a live device for dax_flush() Dan Williams
2017-10-20  2:39 ` [PATCH v3 06/13] dax: store pfns in the radix Dan Williams
2017-10-20  2:39 ` [PATCH v3 07/13] dax: warn if dma collides with truncate Dan Williams
2017-10-20  2:39 ` [PATCH v3 08/13] tools/testing/nvdimm: add 'bio_delay' mechanism Dan Williams
2017-10-20  2:39 ` [PATCH v3 09/13] IB/core: disable memory registration of fileystem-dax vmas Dan Williams
2017-10-20  2:39 ` [PATCH v3 10/13] mm: disable get_user_pages_fast() for dax Dan Williams
2017-10-20  2:39 ` [PATCH v3 11/13] fs: use smp_load_acquire in break_{layout,lease} Dan Williams
2017-10-20 12:39   ` Jeffrey Layton
2017-10-20  2:40 ` [PATCH v3 12/13] dax: handle truncate of dma-busy pages Dan Williams
2017-10-20 13:05   ` Jeff Layton
2017-10-20 15:42     ` Dan Williams
2017-10-20 16:32       ` Christoph Hellwig
2017-10-20 17:27         ` Dan Williams
2017-10-20 20:36           ` Brian Foster
2017-10-21  8:11           ` Christoph Hellwig
2017-10-20  2:40 ` Dan Williams [this message]
2017-10-20  7:47 ` [PATCH v3 00/13] dax: fix dma vs truncate and remove 'page-less' support Christoph Hellwig
2017-10-20  9:31   ` Christoph Hellwig
2017-10-26 10:58     ` Jan Kara
2017-10-26 23:51       ` Williams, Dan J
2017-10-27  6:48         ` Dave Chinner
2017-10-27 11:42           ` Dan Williams
2017-10-29 21:52             ` Dave Chinner
2017-10-27  6:45       ` Christoph Hellwig
2017-10-29 23:46       ` Dan Williams
2017-10-30  2:00         ` Dave Chinner
2017-10-30  8:38           ` Jan Kara
2017-10-30 11:20             ` Dave Chinner
2017-10-30 17:51               ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=150846720846.24336.10565514769202466327.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfields@fieldses.org \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jlayton@poochiereds.net \
    --cc=jmoyer@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox