linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Gomez <da.gomez@samsung.com>
To: "minchan@kernel.org" <minchan@kernel.org>,
	"senozhatsky@chromium.org" <senozhatsky@chromium.org>,
	"axboe@kernel.dk" <axboe@kernel.dk>,
	"djwong@kernel.org" <djwong@kernel.org>,
	"willy@infradead.org" <willy@infradead.org>,
	"hughd@google.com" <hughd@google.com>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"mcgrof@kernel.org" <mcgrof@kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: "gost.dev@samsung.com" <gost.dev@samsung.com>,
	Pankaj Raghav <p.raghav@samsung.com>,
	Daniel Gomez <da.gomez@samsung.com>
Subject: [RFC PATCH 11/11] shmem: add per-block uptodate tracking
Date: Sat, 28 Oct 2023 21:15:52 +0000	[thread overview]
Message-ID: <20231028211518.3424020-12-da.gomez@samsung.com> (raw)
In-Reply-To: <20231028211518.3424020-1-da.gomez@samsung.com>

Current work in progress due to fsx regression (check below).

Based on iomap per-block dirty and uptodate state track, add support
for shmem_folio_state struct to track uptodate per-block when a folio is
larger than a block. In shmem, this is when large folios is used, as one
block is equal to one page in this context.

Add support for invalidate_folio, release_folio and is_partially_uptodate
address space operations. The first two are needed to be able to free
the new shmem_folio_state struct. The last callback is required for
large folios when enabling per-block tracking.

This was spotted when running fstests for tmpfs and regress on
generic/285 and generic/436 tests [1] with large folios support in the
fallocate path without having per-block uptodate tracking.

[1] tests:
generic/285: src/seek_sanity_test/test09()
generic/436: src/seek_sanity_test/test13()

How to reproduce:

```sh
mkdir -p /mnt/test-tmpfs
./src/seek_sanity_test -s 9 -e 9 /mnt/test-tmpfs/file
./src/seek_sanity_test -s 13 -e 13 /mnt/test-tmpfs/file
umount /mnt/test-tmpfs
```

After per-block uptodate support is added, fsx regresion is found when
running the following:

```sh
mkdir -p /mnt/test-tmpfs
mount -t tmpfs -o size=1G -o noswap tmpfs /mnt/test-tmpfs
/root/xfstests-dev/ltp/fsx /mnt/test-tmpfs/file -d -N 1200 -X
umount /mnt/test-tmpfs
```

Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 mm/shmem.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 159 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index eb314927be78..fa67594495d5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -132,6 +132,94 @@ struct shmem_options {
 #define SHMEM_SEEN_QUOTA 32
 };

+/*
+ * Structure allocated for each folio to track per-block uptodate state.
+ *
+ * Like buffered-io shmem_folio_state struct but only for uptodate.
+ */
+struct shmem_folio_state {
+	spinlock_t state_lock;
+	unsigned long state[];
+};
+
+static inline bool sfs_is_fully_uptodate(struct folio *folio,
+					 struct shmem_folio_state *sfs)
+{
+	struct inode *inode = folio->mapping->host;
+
+	return bitmap_full(sfs->state, i_blocks_per_folio(inode, folio));
+}
+
+static inline bool sfs_block_is_uptodate(struct shmem_folio_state *sfs,
+					 unsigned int block)
+{
+	return test_bit(block, sfs->state);
+}
+
+static void sfs_set_range_uptodate(struct folio *folio,
+				   struct shmem_folio_state *sfs, size_t off,
+				   size_t len)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int first_blk = off >> inode->i_blkbits;
+	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sfs->state_lock, flags);
+	bitmap_set(sfs->state, first_blk, nr_blks);
+	if (sfs_is_fully_uptodate(folio, sfs))
+		folio_mark_uptodate(folio);
+	spin_unlock_irqrestore(&sfs->state_lock, flags);
+}
+
+static void shmem_set_range_uptodate(struct folio *folio, size_t off,
+				     size_t len)
+{
+	struct shmem_folio_state *sfs = folio->private;
+
+	if (sfs)
+		sfs_set_range_uptodate(folio, sfs, off, len);
+	else
+		folio_mark_uptodate(folio);
+}
+
+static struct shmem_folio_state *sfs_alloc(struct inode *inode,
+					   struct folio *folio, gfp_t gfp)
+{
+	struct shmem_folio_state *sfs = folio->private;
+	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+
+	if (sfs || nr_blocks <= 1)
+		return sfs;
+
+	/*
+	 * sfs->state tracks uptodate flag when the block size is smaller
+	 * than the folio size.
+	 */
+	sfs = kzalloc(struct_size(sfs, state, BITS_TO_LONGS(nr_blocks)), gfp);
+	if (!sfs)
+		return sfs;
+
+	spin_lock_init(&sfs->state_lock);
+	if (folio_test_uptodate(folio))
+		bitmap_set(sfs->state, 0, nr_blocks);
+	folio_attach_private(folio, sfs);
+
+	return sfs;
+}
+
+static void sfs_free(struct folio *folio)
+{
+	struct shmem_folio_state *sfs = folio_detach_private(folio);
+
+	if (!sfs)
+		return;
+	WARN_ON_ONCE(sfs_is_fully_uptodate(folio, sfs) !=
+		     folio_test_uptodate(folio));
+	kfree(sfs);
+}
+
 #ifdef CONFIG_TMPFS
 static unsigned long shmem_default_max_blocks(void)
 {
@@ -1495,7 +1583,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		}
 		folio_zero_range(folio, 0, folio_size(folio));
 		flush_dcache_folio(folio);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 	}

 	swap = folio_alloc_swap(folio);
@@ -1676,6 +1764,7 @@ static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned int order = shmem_mapping_size_order(mapping, index, len,
 						      SHMEM_SB(inode->i_sb));
+	struct shmem_folio_state *sfs;
 	struct folio *folio;
 	long pages;
 	int error;
@@ -1755,6 +1844,10 @@ static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
 		}
 	}

+	sfs = sfs_alloc(inode, folio, gfp);
+	if (!sfs && i_blocks_per_folio(inode, folio) > 1)
+		goto unlock;
+
 	trace_mm_shmem_add_to_page_cache(folio);
 	shmem_recalc_inode(inode, pages, 0);
 	folio_add_lru(folio);
@@ -1818,7 +1911,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,

 	__folio_set_locked(new);
 	__folio_set_swapbacked(new);
-	folio_mark_uptodate(new);
+	shmem_set_range_uptodate(new, 0, folio_size(new));
 	new->swap = entry;
 	folio_set_swapcache(new);

@@ -2146,7 +2239,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 		for (i = 0; i < n; i++)
 			clear_highpage(folio_page(folio, i));
 		flush_dcache_folio(folio);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 	}

 	/* Perhaps the file has been truncated since we checked */
@@ -2788,13 +2881,18 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 	if (pos + copied > inode->i_size)
 		i_size_write(inode, pos + copied);

+	if (unlikely(copied < len && !folio_test_uptodate(folio)))
+		return 0;
+
 	if (!folio_test_uptodate(folio)) {
-		if (copied < folio_size(folio)) {
-			size_t from = offset_in_folio(folio, pos);
-			folio_zero_segments(folio, 0, from,
-					from + copied, folio_size(folio));
-		}
-		folio_mark_uptodate(folio);
+		size_t from = offset_in_folio(folio, pos);
+		if (!folio_test_large(folio) && copied < folio_size(folio))
+			folio_zero_segments(folio, 0, from, from + copied,
+					    folio_size(folio));
+		if (folio_test_large(folio) && copied < PAGE_SIZE)
+			folio_zero_segments(folio, from, from, from + copied,
+					    folio_size(folio));
+		shmem_set_range_uptodate(folio, from, len);
 	}
 	folio_mark_dirty(folio);
 	folio_unlock(folio);
@@ -2803,6 +2901,54 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 	return copied;
 }

+void shmem_invalidate_folio(struct folio *folio, size_t offset, size_t len)
+{
+	/*
+	 * If we're invalidating the entire folio, clear the dirty state
+	 * from it and release it to avoid unnecessary buildup of the LRU.
+	 */
+	if (offset == 0 && len == folio_size(folio)) {
+		WARN_ON_ONCE(folio_test_writeback(folio));
+		folio_cancel_dirty(folio);
+		sfs_free(folio);
+	}
+}
+
+bool shmem_release_folio(struct folio *folio, gfp_t gfp_flags)
+{
+	sfs_free(folio);
+	return true;
+}
+
+/*
+ * shmem_is_partially_uptodate checks whether blocks within a folio are
+ * uptodate or not.
+ *
+ * Returns true if all blocks which correspond to the specified part
+ * of the folio are uptodate.
+ */
+bool shmem_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
+{
+	struct shmem_folio_state *sfs = folio->private;
+	struct inode *inode = folio->mapping->host;
+	unsigned first, last, i;
+
+	if (!sfs)
+		return false;
+
+	/* Caller's range may extend past the end of this folio */
+	count = min(folio_size(folio) - from, count);
+
+	/* First and last blocks in range within folio */
+	first = from >> inode->i_blkbits;
+	last = (from + count - 1) >> inode->i_blkbits;
+
+	for (i = first; i <= last; i++)
+		if (!sfs_block_is_uptodate(sfs, i))
+			return false;
+	return true;
+}
+
 static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
@@ -3554,7 +3700,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 		folio_mark_dirty(folio);
 		folio_unlock(folio);
 		folio_put(folio);
@@ -4524,6 +4670,9 @@ const struct address_space_operations shmem_aops = {
 #ifdef CONFIG_MIGRATION
 	.migrate_folio	= migrate_folio,
 #endif
+	.invalidate_folio = shmem_invalidate_folio,
+	.release_folio	= shmem_release_folio,
+	.is_partially_uptodate = shmem_is_partially_uptodate,
 	.error_remove_page = shmem_error_remove_page,
 };
 EXPORT_SYMBOL(shmem_aops);
--
2.39.2


  parent reply	other threads:[~2023-10-28 21:16 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20230919135546eucas1p1181b8914fb5eceda5f08068802941358@eucas1p1.samsung.com>
2023-09-19 13:55 ` [PATCH v2 0/6] shmem: high order folios support in write path Daniel Gomez
     [not found]   ` <CGME20230919135547eucas1p2777d9fde904adf4c2d0ac665d78880c1@eucas1p2.samsung.com>
2023-09-19 13:55     ` [PATCH v2 1/6] shmem: drop BLOCKS_PER_PAGE macro Daniel Gomez
     [not found]   ` <CGME20230919135549eucas1p1f67e7879a14a87724a9462fb8dd635bf@eucas1p1.samsung.com>
2023-09-19 13:55     ` [PATCH v2 2/6] shmem: return freed pages in shmem_free_swap Daniel Gomez
2023-09-19 14:56       ` Matthew Wilcox
     [not found]   ` <CGME20230919135550eucas1p2c19565924daeecf71734ea89d95c84db@eucas1p2.samsung.com>
2023-09-19 13:55     ` [PATCH v2 3/6] shmem: account for large order folios Daniel Gomez
     [not found]   ` <CGME20230919135552eucas1p11e19cd339078c2e0b788b52fae46e7c9@eucas1p1.samsung.com>
2023-09-19 13:55     ` [PATCH v2 4/6] shmem: add order parameter support to shmem_alloc_folio Daniel Gomez
     [not found]   ` <CGME20230919135554eucas1p1fefbe420a2381465f3b6b2b7f298433c@eucas1p1.samsung.com>
2023-09-19 13:55     ` [PATCH v2 5/6] shmem: add file length in shmem_get_folio path Daniel Gomez
     [not found]   ` <CGME20230919135556eucas1p19920c52d4af0809499eac6bbf4466117@eucas1p1.samsung.com>
2023-09-19 13:55     ` [PATCH v2 6/6] shmem: add large folios support to the write path Daniel Gomez
2023-09-19 15:01       ` Matthew Wilcox
2023-09-19 16:28         ` Daniel Gomez
     [not found]   ` <CGME20231028211535eucas1p250e19444b8c973221b7cb9e8ab957da7@eucas1p2.samsung.com>
2023-10-28 21:15     ` [RFC PATCH 00/11] shmem: high order folios support in " Daniel Gomez
     [not found]       ` <CGME20231028211538eucas1p186e33f92dbea7030f14f7f79aa1b8d54@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 01/11] XArray: add cmpxchg order test Daniel Gomez
2023-10-29 20:11           ` Matthew Wilcox
2023-11-03 23:12             ` Daniel Gomez
     [not found]       ` <CGME20231028211538eucas1p1456b4c759a9fed51a6a77fbf2c946011@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 02/11] test_xarray: add tests for advanced multi-index use Daniel Gomez
     [not found]       ` <CGME20231028211540eucas1p1fe328f4dadd3645c2c086055efc872ad@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 03/11] shmem: drop BLOCKS_PER_PAGE macro Daniel Gomez
     [not found]       ` <CGME20231028211541eucas1p26663bd957cb449c7346b9dd00e33a20f@eucas1p2.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 04/11] shmem: return number of pages beeing freed in shmem_free_swap Daniel Gomez
     [not found]       ` <CGME20231028211543eucas1p2c980dda91fdccaa0b5af3734c357b2f7@eucas1p2.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 05/11] shmem: account for large order folios Daniel Gomez
2023-10-29 20:40           ` Matthew Wilcox
     [not found]       ` <CGME20231028211545eucas1p2da564864423007a5ab006cdd1ab4d4a1@eucas1p2.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 06/11] shmem: trace shmem_add_to_page_cache folio order Daniel Gomez
2023-10-29 23:14           ` Matthew Wilcox
     [not found]       ` <CGME20231028211546eucas1p2147a423b26a6fa92be7e6c20df429da5@eucas1p2.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 07/11] shmem: remove huge arg from shmem_alloc_and_add_folio() Daniel Gomez
2023-10-29 23:17           ` Matthew Wilcox
     [not found]       ` <CGME20231028211548eucas1p18d34af3d578966ba6778d4e60751789d@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 08/11] shmem: add file length arg in shmem_get_folio() path Daniel Gomez
     [not found]       ` <CGME20231028211550eucas1p1dc1d47e413de350deda962c3df5111ef@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 09/11] shmem: add order arg to shmem_alloc_folio() Daniel Gomez
2023-10-31  7:04           ` Hannes Reinecke
     [not found]       ` <CGME20231028211551eucas1p1552b7695f12c27f4ea1b92ecb6259b31@eucas1p1.samsung.com>
2023-10-28 21:15         ` [RFC PATCH 10/11] shmem: add large folio support to the write path Daniel Gomez
2023-10-29 23:32           ` Matthew Wilcox
     [not found]       ` <CGME20231028211553eucas1p1a93637df6c46692531894e26023920d5@eucas1p1.samsung.com>
2023-10-28 21:15         ` Daniel Gomez [this message]
2023-10-29 20:43       ` [RFC PATCH 00/11] shmem: high order folios support in " Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231028211518.3424020-12-da.gomez@samsung.com \
    --to=da.gomez@samsung.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hughd@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=minchan@kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=senozhatsky@chromium.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox