From: NeilBrown <neilb@suse.de>
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
Anna Schumaker <anna.schumaker@netapp.com>,
Chuck Lever <chuck.lever@oracle.com>,
Andrew Morton <akpm@linux-foundation.org>,
Mel Gorman <mgorman@suse.de>,
Christoph Hellwig <hch@infradead.org>,
David Howells <dhowells@redhat.com>
Cc: linux-nfs@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 14/23] NFS: swap IO handling is slightly different for O_DIRECT IO
Date: Mon, 24 Jan 2022 14:48:32 +1100 [thread overview]
Message-ID: <164299611281.26253.15560926531007295753.stgit@noble.brown> (raw)
In-Reply-To: <164299573337.26253.7538614611220034049.stgit@noble.brown>
1/ Taking the i_rwsem for swap IO triggers lockdep warnings regarding
possible deadlocks with "fs_reclaim". These deadlocks could, I believe,
eventuate if a buffered read on the swapfile was attempted.
We don't need coherence with the page cache for a swap file, and
buffered writes are forbidden anyway. There is no other need for
i_rwsem during direct IO. So never take it for swap_rw()
2/ generic_write_checks() explicitly forbids writes to swap, and
performs checks that are not needed for swap. So bypass it
for swap_rw().
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfs/direct.c | 30 +++++++++++++++++++++---------
fs/nfs/file.c | 4 ++--
include/linux/nfs_fs.h | 4 ++--
3 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b929dd5b0c3a..43a956d7fd62 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -166,9 +166,9 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (iov_iter_rw(iter) == READ)
- ret = nfs_file_direct_read(iocb, iter);
+ ret = nfs_file_direct_read(iocb, iter, true);
else
- ret = nfs_file_direct_write(iocb, iter);
+ ret = nfs_file_direct_write(iocb, iter, true);
if (ret < 0)
return ret;
return 0;
@@ -422,6 +422,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @iter: vector of user buffers into which to read data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
@@ -437,7 +438,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+ bool swap)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -479,12 +481,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (iter_is_iovec(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- nfs_start_io_direct(inode);
+ if (!swap)
+ nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
- nfs_end_io_direct(inode);
+ if (!swap)
+ nfs_end_io_direct(inode);
if (requested > 0) {
result = nfs_direct_wait(dreq);
@@ -873,6 +877,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @iter: vector of user buffers from which to write data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -889,7 +894,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ bool swap)
{
ssize_t result, requested;
size_t count;
@@ -903,7 +909,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, iov_iter_count(iter), (long long) iocb->ki_pos);
- result = generic_write_checks(iocb, iter);
+ if (!swap)
+ result = generic_write_checks(iocb, iter);
+ else
+ /* bypass generic checks */
+ result = iov_iter_count(iter);
if (result <= 0)
return result;
count = result;
@@ -934,7 +944,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dreq->iocb = iocb;
pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
- nfs_start_io_direct(inode);
+ if (!swap)
+ nfs_start_io_direct(inode);
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
@@ -943,7 +954,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
pos >> PAGE_SHIFT, end);
}
- nfs_end_io_direct(inode);
+ if (!swap)
+ nfs_end_io_direct(inode);
if (requested > 0) {
result = nfs_direct_wait(dreq);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 91ff9ed05b06..04ba56f223d3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -159,7 +159,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
ssize_t result;
if (iocb->ki_flags & IOCB_DIRECT)
- return nfs_file_direct_read(iocb, to);
+ return nfs_file_direct_read(iocb, to, false);
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
@@ -625,7 +625,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
return result;
if (iocb->ki_flags & IOCB_DIRECT)
- return nfs_file_direct_write(iocb, from);
+ return nfs_file_direct_write(iocb, from, false);
dprintk("NFS: write(%pD2, %zu@%Ld)\n",
file, iov_iter_count(from), (long long) iocb->ki_pos);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 29a5e579f26f..aba38dc4fd29 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -511,9 +511,9 @@ static inline const struct cred *nfs_file_cred(struct file *file)
*/
extern int nfs_swap_rw(struct kiocb *, struct iov_iter *);
extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
- struct iov_iter *iter);
+ struct iov_iter *iter, bool swap);
extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
- struct iov_iter *iter);
+ struct iov_iter *iter, bool swap);
/*
* linux/fs/nfs/dir.c
next prev parent reply other threads:[~2022-01-24 3:53 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-24 3:48 [PATCH 00/23 V3] Repair SWAP-over_NFS NeilBrown
2022-01-24 3:48 ` [PATCH 09/23] MM: submit multipage reads for SWP_FS_OPS swap-space NeilBrown
2022-01-24 8:25 ` kernel test robot
2022-01-24 8:52 ` Christoph Hellwig
2022-01-24 9:27 ` kernel test robot
2022-01-24 13:16 ` Mark Hemment
2022-01-26 22:04 ` NeilBrown
2022-02-08 11:07 ` Geert Uytterhoeven
2022-01-24 3:48 ` NeilBrown [this message]
2022-01-24 8:58 ` [PATCH 14/23] NFS: swap IO handling is slightly different for O_DIRECT IO Christoph Hellwig
2022-01-24 13:22 ` Mark Hemment
2022-01-26 22:51 ` NeilBrown
2022-01-24 3:48 ` [PATCH 20/23] SUNRPC: improve 'swap' handling: scheduling and PF_MEMALLOC NeilBrown
2022-01-24 3:48 ` [PATCH 01/23] MM: create new mm/swap.h header file NeilBrown
2022-02-07 13:51 ` Geert Uytterhoeven
2022-01-24 3:48 ` [PATCH 04/23] MM: move responsibility for setting SWP_FS_OPS to ->swap_activate NeilBrown
2022-01-24 7:30 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 05/23] MM: reclaim mustn't enter FS for SWP_FS_OPS swap-space NeilBrown
2022-01-24 7:31 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 13/23] NFS: rename nfs_direct_IO and use as ->swap_rw NeilBrown
2022-01-24 8:57 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 07/23] MM: perform async writes to SWP_FS_OPS swap-space using ->swap_rw NeilBrown
2022-01-24 8:49 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 16/23] SUNRPC/auth: async tasks mustn't block waiting for memory NeilBrown
2022-01-24 3:48 ` [PATCH 18/23] SUNRPC: remove scheduling boost for "SWAPPER" tasks NeilBrown
2022-01-24 3:48 ` [PATCH 10/23] MM: submit multipage write for SWP_FS_OPS swap-space NeilBrown
2022-01-24 8:55 ` Christoph Hellwig
2022-01-24 10:29 ` kernel test robot
2022-01-24 3:48 ` [PATCH 08/23] DOC: update documentation for swap_activate and swap_rw NeilBrown
2022-01-24 8:50 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 11/23] VFS: Add FMODE_CAN_ODIRECT file flag NeilBrown
2022-01-24 8:56 ` Christoph Hellwig
2022-01-26 22:14 ` NeilBrown
2022-01-24 3:48 ` [PATCH 12/23] NFS: remove IS_SWAPFILE hack NeilBrown
2022-01-24 8:56 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 21/23] NFSv4: keep state manager thread active if swap is enabled NeilBrown
2022-01-24 3:48 ` [PATCH 02/23] MM: extend block-plugging to cover all swap reads with read-ahead NeilBrown
2022-01-24 7:27 ` Christoph Hellwig
2022-01-26 21:47 ` NeilBrown
2022-01-26 23:09 ` Hugh Dickins
2022-01-27 0:32 ` NeilBrown
2022-01-24 3:48 ` [PATCH 06/23] MM: introduce ->swap_rw and use it for reads from SWP_FS_OPS swap-space NeilBrown
2022-01-24 8:48 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 22/23] NFS: swap-out must always use STABLE writes NeilBrown
2022-01-26 3:45 ` Trond Myklebust
2022-01-26 21:42 ` NeilBrown
2022-01-24 3:48 ` [PATCH 23/23] SUNRPC: lock against ->sock changing during sysfs read NeilBrown
2022-01-24 3:48 ` [PATCH 17/23] SUNRPC/xprt: async tasks mustn't block waiting for memory NeilBrown
2022-01-24 3:48 ` [PATCH 19/23] NFS: discard NFS_RPC_SWAPFLAGS and RPC_TASK_ROOTCREDS NeilBrown
2022-01-24 3:48 ` [PATCH 03/23] MM: drop swap_set_page_dirty NeilBrown
2022-01-24 7:28 ` Christoph Hellwig
2022-01-24 3:48 ` [PATCH 15/23] SUNRPC/call_alloc: async tasks mustn't block waiting for memory NeilBrown
2022-02-07 17:55 ` [PATCH 00/23 V3] Repair SWAP-over_NFS Geert Uytterhoeven
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=164299611281.26253.15560926531007295753.stgit@noble.brown \
--to=neilb@suse.de \
--cc=akpm@linux-foundation.org \
--cc=anna.schumaker@netapp.com \
--cc=chuck.lever@oracle.com \
--cc=dhowells@redhat.com \
--cc=hch@infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nfs@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=trond.myklebust@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox