From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: djwong@kernel.org, john.g.garry@oracle.com, willy@infradead.org,
hch@lst.de, ritesh.list@gmail.com, jack@suse.cz,
Luis Chamberlain <mcgrof@kernel.org>,
dgc@kernel.org, tytso@mit.edu, p.raghav@samsung.com,
andres@anarazel.de, brauner@kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [RFC PATCH v2 4/5] iomap: Add aio support to RWF_WRITETHROUGH
Date: Thu, 9 Apr 2026 00:15:45 +0530 [thread overview]
Message-ID: <dcb4a5bfb8872c38c263290c124bbfc8a0f8b3c7.1775658795.git.ojaswin@linux.ibm.com> (raw)
In-Reply-To: <cover.1775658795.git.ojaswin@linux.ibm.com>
With aio the only thing we need to be careful off is that writethrough
can be in progress even after dropping inode and folio lock. Due to
this, we need a way to synchronise with other paths where stable write
is not enough, example:
1. Truncate to 0 in xfs sets i_size = 0 before waiting for writeback to
complete. In case of writethrough, the end io completion can again
push the i_size to a non-zero value.
2. Dio reads might race with aio writethrough ->end_io() and read 0s if
unwritten conversion is yet to happen.
Hence use the dio begin/end as it gives us the required guarantees.
Co-developed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
---
fs/iomap/buffered-io.c | 53 ++++++++++++++++++++++++++++++++++++------
include/linux/iomap.h | 10 ++++++--
2 files changed, 54 insertions(+), 9 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 74e1ab108b0f..6937f10e2782 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1113,6 +1113,9 @@ static ssize_t iomap_writethrough_complete(struct iomap_writethrough_ctx *wt_ctx
mapping_clear_stable_writes(inode->i_mapping);
+ if (wt_ctx->is_aio)
+ inode_dio_end(inode);
+
if (!ret) {
ret = wt_ctx->written;
iocb->ki_pos = wt_ctx->pos + ret;
@@ -1122,12 +1125,27 @@ static ssize_t iomap_writethrough_complete(struct iomap_writethrough_ctx *wt_ctx
return ret;
}
+static void iomap_writethrough_complete_work(struct work_struct *work)
+{
+ struct iomap_writethrough_ctx *wt_ctx =
+ container_of(work, struct iomap_writethrough_ctx, aio_work);
+ struct kiocb *iocb = wt_ctx->iocb;
+
+ iocb->ki_complete(iocb, iomap_writethrough_complete(wt_ctx));
+}
+
static void iomap_writethrough_done(struct iomap_writethrough_ctx *wt_ctx)
{
- struct task_struct *waiter = wt_ctx->waiter;
+ if (!wt_ctx->is_aio) {
+ struct task_struct *waiter = wt_ctx->waiter;
- WRITE_ONCE(wt_ctx->waiter, NULL);
- blk_wake_io_task(waiter);
+ WRITE_ONCE(wt_ctx->waiter, NULL);
+ blk_wake_io_task(waiter);
+ return;
+ }
+
+ INIT_WORK(&wt_ctx->aio_work, iomap_writethrough_complete_work);
+ queue_work(wt_ctx->inode->i_sb->s_dio_done_wq, &wt_ctx->aio_work);
return;
}
@@ -1530,9 +1548,6 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
if (iocb_is_dsync(iocb))
/* D_SYNC support not implemented yet */
return -EOPNOTSUPP;
- if (!is_sync_kiocb(iocb))
- /* aio support not implemented yet */
- return -EOPNOTSUPP;
/*
* +1 to max bvecs to account for unaligned write spanning multiple
@@ -1557,11 +1572,32 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
wt_ctx->pos = iocb->ki_pos;
wt_ctx->new_i_size = i_size_read(inode);
wt_ctx->max_bvecs = max_bvecs;
+ wt_ctx->is_aio = !is_sync_kiocb(iocb);
atomic_set(&wt_ctx->ref, 1);
- wt_ctx->waiter = current;
+
+ if (!wt_ctx->is_aio)
+ wt_ctx->waiter = current;
+ else
+ /*
+ * With aio, writethrough can be in progress even after dropping
+ * inode and folio lock. Due to this, we need a way to
+ * synchronise with other paths where stable write is not enough
+ * (example truncate). Hence use the dio begin/end as it gives
+ * us the required guarantees.
+ */
+ inode_dio_begin(inode);
mapping_set_stable_writes(inode->i_mapping);
+ if (wt_ctx->is_aio && !inode->i_sb->s_dio_done_wq) {
+ ret = sb_init_dio_done_wq(inode->i_sb);
+ if (ret < 0) {
+ mapping_clear_stable_writes(inode->i_mapping);
+ kfree(wt_ctx);
+ return ret;
+ }
+ }
+
while ((ret = iomap_iter(&iter, wt_ops->ops)) > 0) {
WARN_ON(iter.iomap.type != IOMAP_UNWRITTEN &&
iter.iomap.type != IOMAP_MAPPED);
@@ -1571,6 +1607,9 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
cmpxchg(&wt_ctx->error, 0, ret);
if (!atomic_dec_and_test(&wt_ctx->ref)) {
+ if (wt_ctx->is_aio)
+ return -EIOCBQUEUED;
+
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(wt_ctx->waiter))
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 661233aa009d..e99f7c279dc6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -486,9 +486,15 @@ struct iomap_writethrough_ctx {
atomic_t ref;
unsigned int flags;
int error;
+ bool is_aio;
- /* used during submission and for non-aio completion */
- struct task_struct *waiter;
+ union {
+ /* used during submission and for non-aio completion */
+ struct task_struct *waiter;
+
+ /* used during aio completion */
+ struct work_struct aio_work;
+ };
loff_t bio_pos;
unsigned int nr_bvecs;
--
2.53.0
next prev parent reply other threads:[~2026-04-08 18:46 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-08 18:45 [RFC PATCH v2 0/5] Add buffered write-through support to iomap & xfs Ojaswin Mujoo
2026-04-08 18:45 ` [RFC PATCH v2 1/5] mm: Refactor folio_clear_dirty_for_io() Ojaswin Mujoo
2026-04-08 18:45 ` [RFC PATCH v2 2/5] iomap: Add initial support for buffered RWF_WRITETHROUGH Ojaswin Mujoo
2026-04-08 18:45 ` [RFC PATCH v2 3/5] xfs: Add RWF_WRITETHROUGH support to xfs Ojaswin Mujoo
2026-04-08 18:45 ` Ojaswin Mujoo [this message]
2026-04-08 18:45 ` [RFC PATCH v2 5/5] iomap: Add DSYNC support to writethrough Ojaswin Mujoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=dcb4a5bfb8872c38c263290c124bbfc8a0f8b3c7.1775658795.git.ojaswin@linux.ibm.com \
--to=ojaswin@linux.ibm.com \
--cc=andres@anarazel.de \
--cc=brauner@kernel.org \
--cc=dgc@kernel.org \
--cc=djwong@kernel.org \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=john.g.garry@oracle.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=mcgrof@kernel.org \
--cc=p.raghav@samsung.com \
--cc=ritesh.list@gmail.com \
--cc=tytso@mit.edu \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox