linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: linux-xfs@vger.kernel.org, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	linux-rdma@vger.kernel.org, linux-api@vger.kernel.org,
	Dave Chinner <david@fromorbit.com>,
	Christoph Hellwig <hch@lst.de>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	linux-mm@kvack.org, Jeff Moyer <jmoyer@redhat.com>,
	linux-fsdevel@vger.kernel.org,
	Jeff Layton <jlayton@poochiereds.net>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: [PATCH v7 08/12] fs, mapdirect: introduce ->lease_direct()
Date: Fri, 06 Oct 2017 15:36:00 -0700	[thread overview]
Message-ID: <150732936063.22363.4533598271967882402.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150732931273.22363.8436792888326501071.stgit@dwillia2-desk3.amr.corp.intel.com>

Provide a vma operation that registers a lease that is broken by
break_layout(). This is motivated by a need to stop in-progress RDMA
when the block-map of a DAX-file changes. I.e. since DAX gives
direct-access to filesystem blocks we can not allow those blocks to move
or change state while they are under active RDMA. So, if the filesystem
determines it needs to move blocks it can revoke device access before
proceeding.

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/mapdirect.c            |  117 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mapdirect.h |   23 +++++++++
 include/linux/mm.h        |    6 ++
 3 files changed, 146 insertions(+)

diff --git a/fs/mapdirect.c b/fs/mapdirect.c
index 9ac7c1d946a2..338cbe055fc7 100644
--- a/fs/mapdirect.c
+++ b/fs/mapdirect.c
@@ -16,6 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
@@ -32,12 +33,26 @@ struct map_direct_state {
 	struct vm_area_struct *mds_vma;
 };
 
+struct lease_direct_state {
+	void *lds_owner;
+	struct file *lds_file;
+	unsigned long lds_state;
+	void (*lds_break_fn)(void *lds_owner);
+	struct work_struct lds_work;
+};
+
 bool is_map_direct_valid(struct map_direct_state *mds)
 {
 	return test_bit(MAPDIRECT_VALID, &mds->mds_state);
 }
 EXPORT_SYMBOL_GPL(is_map_direct_valid);
 
+bool is_map_direct_broken(struct map_direct_state *mds)
+{
+	return test_bit(MAPDIRECT_BREAK, &mds->mds_state);
+}
+EXPORT_SYMBOL_GPL(is_map_direct_broken);
+
 static void put_map_direct(struct map_direct_state *mds)
 {
 	if (!atomic_dec_and_test(&mds->mds_ref))
@@ -162,6 +177,108 @@ static const struct lock_manager_operations map_direct_lm_ops = {
 	.lm_setup = map_direct_lm_setup,
 };
 
+static void lease_direct_invalidate(struct work_struct *work)
+{
+	struct lease_direct_state *lds;
+	void *owner;
+
+	lds = container_of(work, typeof(*lds), lds_work);
+	owner = lds;
+	lds->lds_break_fn(lds->lds_owner);
+	vfs_setlease(lds->lds_file, F_UNLCK, NULL, &owner);
+}
+
+static bool lease_direct_lm_break(struct file_lock *fl)
+{
+	struct lease_direct_state *lds = fl->fl_owner;
+
+	if (!test_and_set_bit(MAPDIRECT_BREAK, &lds->lds_state))
+		schedule_work(&lds->lds_work);
+	return false;
+}
+
+static int lease_direct_lm_change(struct file_lock *fl, int arg,
+		struct list_head *dispose)
+{
+	WARN_ON(!(arg & F_UNLCK));
+	return lease_modify(fl, arg, dispose);
+}
+
+static const struct lock_manager_operations lease_direct_lm_ops = {
+	.lm_break = lease_direct_lm_break,
+	.lm_change = lease_direct_lm_change,
+};
+
+struct lease_direct *map_direct_lease(struct vm_area_struct *vma,
+		void (*lds_break_fn)(void *), void *lds_owner)
+{
+	struct file *file = vma->vm_file;
+	struct lease_direct_state *lds;
+	struct lease_direct *ld;
+	struct file_lock *fl;
+	int rc = -ENOMEM;
+	void *owner;
+
+	ld = kzalloc(sizeof(*ld) + sizeof(*lds), GFP_KERNEL);
+	if (!ld)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&ld->list);
+	lds = (struct lease_direct_state *)(ld + 1);
+	owner = lds;
+	ld->lds = lds;
+	lds->lds_break_fn = lds_break_fn;
+	lds->lds_owner = lds_owner;
+	INIT_WORK(&lds->lds_work, lease_direct_invalidate);
+	lds->lds_file = get_file(file);
+
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto err_lock_alloc;
+
+	locks_init_lock(fl);
+	fl->fl_lmops = &lease_direct_lm_ops;
+	fl->fl_flags = FL_LAYOUT;
+	fl->fl_type = F_RDLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner = lds;
+	fl->fl_pid = current->tgid;
+	fl->fl_file = file;
+
+	rc = vfs_setlease(file, fl->fl_type, &fl, &owner);
+	if (rc)
+		goto err_setlease;
+	if (fl) {
+		WARN_ON(1);
+		owner = lds;
+		vfs_setlease(file, F_UNLCK, NULL, &owner);
+		owner = NULL;
+		rc = -ENXIO;
+		goto err_setlease;
+	}
+
+	return ld;
+err_setlease:
+	locks_free_lock(fl);
+err_lock_alloc:
+	kfree(lds);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(map_direct_lease);
+
+void map_direct_lease_destroy(struct lease_direct *ld)
+{
+	struct lease_direct_state *lds = ld->lds;
+	struct file *file = lds->lds_file;
+	void *owner = lds;
+
+	vfs_setlease(file, F_UNLCK, NULL, &owner);
+	flush_work(&lds->lds_work);
+	fput(file);
+	WARN_ON(!list_empty(&ld->list));
+	kfree(ld);
+}
+EXPORT_SYMBOL_GPL(map_direct_lease_destroy);
+
 struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma)
 {
 	struct map_direct_state *mds = kzalloc(sizeof(*mds), GFP_KERNEL);
diff --git a/include/linux/mapdirect.h b/include/linux/mapdirect.h
index 724e27d8615e..dc4d4ba677d0 100644
--- a/include/linux/mapdirect.h
+++ b/include/linux/mapdirect.h
@@ -13,17 +13,28 @@
 #ifndef __MAPDIRECT_H__
 #define __MAPDIRECT_H__
 #include <linux/err.h>
+#include <linux/list.h>
 
 struct inode;
 struct work_struct;
 struct vm_area_struct;
 struct map_direct_state;
+struct list_direct_state;
+
+struct lease_direct {
+	struct list_head list;
+	struct lease_direct_state *lds;
+};
 
 #if IS_ENABLED(CONFIG_FS_DAX)
 struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma);
 int put_map_direct_vma(struct map_direct_state *mds);
 void get_map_direct_vma(struct map_direct_state *mds);
 bool is_map_direct_valid(struct map_direct_state *mds);
+bool is_map_direct_broken(struct map_direct_state *mds);
+struct lease_direct *map_direct_lease(struct vm_area_struct *vma,
+		void (*ld_break_fn)(void *), void *ld_owner);
+void map_direct_lease_destroy(struct lease_direct *ld);
 #else
 static inline struct map_direct_state *map_direct_register(int fd,
 		struct vm_area_struct *vma)
@@ -41,5 +52,17 @@ bool is_map_direct_valid(struct map_direct_state *mds)
 {
 	return false;
 }
+bool is_map_direct_broken(struct map_direct_state *mds)
+{
+	return false;
+}
+struct lease_direct *map_direct_lease(struct vm_area_struct *vma,
+		void (*ld_break_fn)(void *), void *ld_owner)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+void map_direct_lease_destroy(struct lease_direct *ld)
+{
+}
 #endif
 #endif /* __MAPDIRECT_H__ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0afa19feb755..d03953f91ce8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -420,6 +420,12 @@ struct vm_operations_struct {
 	 */
 	struct page *(*find_special_page)(struct vm_area_struct *vma,
 					  unsigned long addr);
+	/*
+	 * Called by rdma memory registration to subscribe for "break"
+	 * events that require any ongoing rdma accesses to quiesce.
+	 */
+	struct lease_direct *(*lease_direct)(struct vm_area_struct *vma,
+			void (*break_fn)(void *), void *owner);
 };
 
 struct mmu_gather;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-10-06 22:42 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-06 22:35 [PATCH v7 00/12] MAP_DIRECT for DAX RDMA and userspace flush Dan Williams
2017-10-06 22:35 ` [PATCH v7 01/12] mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags Dan Williams
2017-10-06 22:35 ` [PATCH v7 02/12] fs, mm: pass fd to ->mmap_validate() Dan Williams
2017-10-06 22:35 ` [PATCH v7 03/12] fs: introduce i_mapdcount Dan Williams
2017-10-09  3:08   ` Dave Chinner
2017-10-06 22:35 ` [PATCH v7 04/12] fs: MAP_DIRECT core Dan Williams
2017-10-06 22:35 ` [PATCH v7 05/12] xfs: prepare xfs_break_layouts() for reuse with MAP_DIRECT Dan Williams
2017-10-06 22:35 ` [PATCH v7 06/12] xfs: wire up MAP_DIRECT Dan Williams
2017-10-09  3:40   ` Dave Chinner
2017-10-09 17:08     ` Dan Williams
2017-10-09 22:50       ` Dave Chinner
2017-10-06 22:35 ` [PATCH v7 07/12] dma-mapping: introduce dma_has_iommu() Dan Williams
2017-10-06 22:45   ` David Woodhouse
2017-10-06 22:52     ` Dan Williams
2017-10-06 23:10       ` David Woodhouse
2017-10-06 23:15         ` Dan Williams
2017-10-07 11:08           ` David Woodhouse
2017-10-07 23:33             ` Dan Williams
2017-10-06 23:12       ` Dan Williams
2017-10-08  3:45   ` [PATCH v8] dma-mapping: introduce dma_get_iommu_domain() Dan Williams
2017-10-09 10:37     ` Robin Murphy
2017-10-09 17:32       ` Dan Williams
2017-10-10 14:40     ` Raj, Ashok
2017-10-09 18:58   ` [PATCH v7 07/12] dma-mapping: introduce dma_has_iommu() Jason Gunthorpe
2017-10-09 19:05     ` Dan Williams
2017-10-09 19:18       ` Jason Gunthorpe
2017-10-09 19:28         ` Dan Williams
2017-10-10 17:25           ` Jason Gunthorpe
2017-10-10 17:39             ` Dan Williams
2017-10-10 18:05               ` Jason Gunthorpe
2017-10-10 20:17                 ` Dan Williams
2017-10-12 18:27                   ` Jason Gunthorpe
2017-10-12 20:10                     ` Dan Williams
2017-10-13  6:50                       ` Christoph Hellwig
2017-10-13 15:03                         ` Jason Gunthorpe
2017-10-15 15:14                           ` Matan Barak
2017-10-15 15:21                             ` Dan Williams
2017-10-13  7:09         ` Christoph Hellwig
2017-10-06 22:36 ` Dan Williams [this message]
2017-10-06 22:36 ` [PATCH v7 09/12] xfs: wire up ->lease_direct() Dan Williams
2017-10-09  3:45   ` Dave Chinner
2017-10-09 17:10     ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 10/12] device-dax: " Dan Williams
2017-10-06 22:36 ` [PATCH v7 11/12] IB/core: use MAP_DIRECT to fix / enable RDMA to DAX mappings Dan Williams
2017-10-08  4:02   ` [PATCH v8 1/2] iommu: up-level sg_num_pages() from amd-iommu Dan Williams
2017-10-08  4:04   ` [PATCH v8 2/2] IB/core: use MAP_DIRECT to fix / enable RDMA to DAX mappings Dan Williams
2017-10-08  6:45     ` kbuild test robot
2017-10-08 15:49       ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 12/12] tools/testing/nvdimm: enable rdma unit tests Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=150732936063.22363.4533598271967882402.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=bfields@fieldses.org \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jlayton@poochiereds.net \
    --cc=jmoyer@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox