From: Jens Axboe <axboe@kernel.dk>
To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org
Cc: hannes@cmpxchg.org, clm@meta.com, linux-kernel@vger.kernel.org,
willy@infradead.org, kirill@shutemov.name,
linux-btrfs@vger.kernel.org, linux-ext4@vger.kernel.org,
linux-xfs@vger.kernel.org, bfoster@redhat.com,
Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 13/17] ext4: add RWF_UNCACHED write support
Date: Thu, 14 Nov 2024 08:25:17 -0700 [thread overview]
Message-ID: <20241114152743.2381672-15-axboe@kernel.dk> (raw)
In-Reply-To: <20241114152743.2381672-2-axboe@kernel.dk>
IOCB_UNCACHED IO needs to prune writeback regions on IO completion,
and hence need the worker punt that ext4 also does for unwritten
extents. Add an io_end flag to manage that.
If foliop is set to foliop_uncached in ext4_write_begin(), then set
FGP_UNCACHED so that __filemap_get_folio() will mark newly created
folios as uncached. That in turn will make writeback completion drop
these ranges from the page cache.
Now that ext4 supports both uncached reads and writes, add the fop_flag
FOP_UNCACHED to enable it.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/ext4/ext4.h | 1 +
fs/ext4/file.c | 2 +-
fs/ext4/inline.c | 7 ++++++-
fs/ext4/inode.c | 18 ++++++++++++++++--
fs/ext4/page-io.c | 28 ++++++++++++++++------------
5 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 44b0d418143c..60dc9ffae076 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -279,6 +279,7 @@ struct ext4_system_blocks {
* Flags for ext4_io_end->flags
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_UNCACHED 0x0002
struct ext4_io_end_vec {
struct list_head list; /* list of io_end_vec */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f14aed14b9cf..0ef39d738598 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -944,7 +944,7 @@ const struct file_operations ext4_file_operations = {
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
- FOP_DIO_PARALLEL_WRITE,
+ FOP_DIO_PARALLEL_WRITE | FOP_UNCACHED,
};
const struct inode_operations ext4_file_inode_operations = {
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 3536ca7e4fcc..500bfb6d4860 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -667,6 +667,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
handle_t *handle;
struct folio *folio;
struct ext4_iloc iloc;
+ fgf_t fgp_flags;
if (pos + len > ext4_get_max_inline_size(inode))
goto convert;
@@ -702,7 +703,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (ret)
goto out;
- folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
+ fgp_flags = FGP_WRITEBEGIN | FGP_NOFS;
+ if (foliop_is_uncached(foliop))
+ fgp_flags |= FGP_UNCACHED;
+
+ folio = __filemap_get_folio(mapping, 0, fgp_flags,
mapping_gfp_mask(mapping));
if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 54bdd4884fe6..9b815137fb2c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1138,6 +1138,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
int ret, needed_blocks;
handle_t *handle;
int retries = 0;
+ fgf_t fgp_flags;
struct folio *folio;
pgoff_t index;
unsigned from, to;
@@ -1164,6 +1165,15 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
+ /*
+ * Set FGP_WRITEBEGIN, and FGP_UNCACHED if foliop is marked as
+ * uncached. That's how generic_perform_write() informs us that this
+ * is an uncached write.
+ */
+ fgp_flags = FGP_WRITEBEGIN;
+ if (foliop_is_uncached(foliop))
+ fgp_flags |= FGP_UNCACHED;
+
/*
* __filemap_get_folio() can take a long time if the
* system is thrashing due to memory pressure, or if the folio
@@ -1172,7 +1182,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
* the folio (if needed) without using GFP_NOFS.
*/
retry_grab:
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -2903,6 +2913,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct folio *folio;
pgoff_t index;
struct inode *inode = mapping->host;
+ fgf_t fgp_flags;
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -2926,8 +2937,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
+ fgp_flags = FGP_WRITEBEGIN;
+ if (foliop_is_uncached(foliop))
+ fgp_flags |= FGP_UNCACHED;
retry:
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ad5543866d21..10447c3c4ff1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -226,8 +226,6 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
unsigned long flags;
/* Only reserved conversions from writeback should enter here */
- WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
- WARN_ON(!io_end->handle && sbi->s_journal);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
wq = sbi->rsv_conversion_wq;
if (list_empty(&ei->i_rsv_conversion_list))
@@ -252,7 +250,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
while (!list_empty(&unwritten)) {
io_end = list_entry(unwritten.next, ext4_io_end_t, list);
- BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+ BUG_ON(!(io_end->flag & (EXT4_IO_END_UNWRITTEN|EXT4_IO_UNCACHED)));
list_del_init(&io_end->list);
err = ext4_end_io_end(io_end);
@@ -287,14 +285,15 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
- if (refcount_dec_and_test(&io_end->count)) {
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
- list_empty(&io_end->list_vec)) {
- ext4_release_io_end(io_end);
- return;
- }
- ext4_add_complete_io(io_end);
+ if (!refcount_dec_and_test(&io_end->count))
+ return;
+ if ((!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
+ list_empty(&io_end->list_vec)) &&
+ !(io_end->flag & EXT4_IO_UNCACHED)) {
+ ext4_release_io_end(io_end);
+ return;
}
+ ext4_add_complete_io(io_end);
}
int ext4_put_io_end(ext4_io_end_t *io_end)
@@ -348,7 +347,7 @@ static void ext4_end_bio(struct bio *bio)
blk_status_to_errno(bio->bi_status));
}
- if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+ if (io_end->flag & (EXT4_IO_END_UNWRITTEN|EXT4_IO_UNCACHED)) {
/*
* Link bio into list hanging from io_end. We have to do it
* atomically as bio completions can be racing against each
@@ -417,8 +416,13 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
submit_and_retry:
ext4_io_submit(io);
}
- if (io->io_bio == NULL)
+ if (io->io_bio == NULL) {
io_submit_init_bio(io, bh);
+ if (folio_test_uncached(folio)) {
+ ext4_io_end_t *io_end = io->io_bio->bi_private;
+ io_end->flag |= EXT4_IO_UNCACHED;
+ }
+ }
if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh)))
goto submit_and_retry;
wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size);
--
2.45.2
next prev parent reply other threads:[~2024-11-14 15:28 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-14 15:25 [PATCHSET v5 0/17] Uncached buffered IO Jens Axboe
2024-11-14 15:25 ` [PATCH 01/17] mm/filemap: change filemap_create_folio() to take a struct kiocb Jens Axboe
2024-11-14 15:25 ` [PATCH 02/17] mm/readahead: add folio allocation helper Jens Axboe
2024-11-14 15:25 ` [PATCH 03/17] mm: add PG_uncached page flag Jens Axboe
2024-11-14 15:25 ` [PATCH 04/17] mm/readahead: add readahead_control->uncached member Jens Axboe
2024-11-14 15:25 ` [PATCH 05/17] mm/filemap: use page_cache_sync_ra() to kick off read-ahead Jens Axboe
2024-11-14 15:25 ` [PATCH 06/17] mm/truncate: add folio_unmap_invalidate() helper Jens Axboe
2024-11-14 15:25 ` [PATCH 07/17] fs: add RWF_UNCACHED iocb and FOP_UNCACHED file_operations flag Jens Axboe
2024-11-14 15:25 ` [PATCH 08/17] mm/filemap: add read support for RWF_UNCACHED Jens Axboe
2024-11-15 8:49 ` Kirill A. Shutemov
2024-11-15 15:01 ` Jens Axboe
2024-11-14 15:25 ` [PATCH 09/17] mm/filemap: drop uncached pages when writeback completes Jens Axboe
2024-11-14 15:25 ` [PATCH 10/17] mm/filemap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-11-18 8:42 ` Baokun Li
2024-11-18 14:49 ` Jens Axboe
2024-11-14 15:25 ` [PATCH 11/17] mm/filemap: add filemap_fdatawrite_range_kick() helper Jens Axboe
2024-11-14 15:25 ` [PATCH 12/17] mm: add FGP_UNCACHED folio creation flag Jens Axboe
2024-11-14 15:25 ` Jens Axboe [this message]
2024-11-14 15:25 ` [PATCH 14/17] iomap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-12-12 5:50 ` Christoph Hellwig
2024-12-12 6:26 ` Darrick J. Wong
2024-12-12 6:31 ` Christoph Hellwig
2024-11-14 15:25 ` [PATCH 15/17] xfs: punt uncached write completions to the completion wq Jens Axboe
2024-11-14 15:25 ` [PATCH 16/17] xfs: flag as supporting FOP_UNCACHED Jens Axboe
2024-11-14 15:25 ` [PATCH 17/17] btrfs: add support for uncached writes Jens Axboe
2024-11-15 4:01 ` [PATCHSET v5 0/17] Uncached buffered IO Julian Sun
2024-11-15 15:06 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241114152743.2381672-15-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=bfoster@redhat.com \
--cc=clm@meta.com \
--cc=hannes@cmpxchg.org \
--cc=kirill@shutemov.name \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox