linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Gomez <da.gomez@samsung.com>
To: "hughd@google.com" <hughd@google.com>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"willy@infradead.org" <willy@infradead.org>,
	"jack@suse.cz" <jack@suse.cz>,
	"mcgrof@kernel.org" <mcgrof@kernel.org>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
	"djwong@kernel.org" <djwong@kernel.org>,
	"Pankaj Raghav" <p.raghav@samsung.com>,
	"dagmcr@gmail.com" <dagmcr@gmail.com>,
	"yosryahmed@google.com" <yosryahmed@google.com>,
	"baolin.wang@linux.alibaba.com" <baolin.wang@linux.alibaba.com>,
	"ritesh.list@gmail.com" <ritesh.list@gmail.com>,
	"lsf-pc@lists.linux-foundation.org"
	<lsf-pc@lists.linux-foundation.org>,
	"david@redhat.com" <david@redhat.com>,
	"chandan.babu@oracle.com" <chandan.babu@oracle.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"brauner@kernel.org" <brauner@kernel.org>,
	Daniel Gomez <da.gomez@samsung.com>
Subject: [PATCH 02/12] shmem: add per-block uptodate tracking for large folios
Date: Wed, 15 May 2024 05:57:24 +0000	[thread overview]
Message-ID: <20240515055719.32577-3-da.gomez@samsung.com> (raw)
In-Reply-To: <20240515055719.32577-1-da.gomez@samsung.com>

Based on iomap per-block dirty and uptodate state track, add support
for shmem_folio_state struct to track the uptodate state per-block for
large folios.

Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
---
 mm/shmem.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 189 insertions(+), 6 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 94ab99b6b574..4818f9fbd328 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -131,6 +131,124 @@ struct shmem_options {
 #define SHMEM_SEEN_QUOTA 32
 };
 
+/*
+ * Structure allocated for each folio to track per-block uptodate state.
+ *
+ * Like buffered-io iomap_folio_state struct but only for uptodate.
+ */
+struct shmem_folio_state {
+	spinlock_t state_lock;
+	unsigned long state[];
+};
+
+static inline bool sfs_is_fully_uptodate(struct folio *folio)
+{
+	struct inode *inode = folio->mapping->host;
+	struct shmem_folio_state *sfs = folio->private;
+
+	return bitmap_full(sfs->state, i_blocks_per_folio(inode, folio));
+}
+
+static inline bool sfs_is_block_uptodate(struct shmem_folio_state *sfs,
+					 unsigned int block)
+{
+	return test_bit(block, sfs->state);
+}
+
+/**
+ * sfs_get_last_block_uptodate - find the index of the last uptodate block
+ * within a specified range
+ * @folio: The folio
+ * @first: The starting block of the range to search
+ * @last: The ending block of the range to search
+ *
+ * Returns the index of the last uptodate block within the specified range. If
+ * a non uptodate block is found at the start, it returns UINT_MAX.
+ */
+static unsigned int sfs_get_last_block_uptodate(struct folio *folio,
+						unsigned int first,
+						unsigned int last)
+{
+	struct inode *inode = folio->mapping->host;
+	struct shmem_folio_state *sfs = folio->private;
+	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+	unsigned int aux = find_next_zero_bit(sfs->state, nr_blocks, first);
+
+	/*
+	 * Exceed the range of possible last block and return UINT_MAX if a non
+	 * uptodate block is found at the beginning of the scan.
+	 */
+	if (aux == first)
+		return UINT_MAX;
+
+	return min_t(unsigned int, aux - 1, last);
+}
+
+static void sfs_set_range_uptodate(struct folio *folio,
+				   struct shmem_folio_state *sfs, size_t off,
+				   size_t len)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int first_blk = off >> inode->i_blkbits;
+	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sfs->state_lock, flags);
+	bitmap_set(sfs->state, first_blk, nr_blks);
+	if (sfs_is_fully_uptodate(folio))
+		folio_mark_uptodate(folio);
+	spin_unlock_irqrestore(&sfs->state_lock, flags);
+}
+
+static struct shmem_folio_state *sfs_alloc(struct inode *inode,
+					   struct folio *folio)
+{
+	struct shmem_folio_state *sfs = folio->private;
+	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+	gfp_t gfp = GFP_KERNEL;
+
+	if (sfs || nr_blocks <= 1)
+		return sfs;
+
+	/*
+	 * sfs->state tracks uptodate flag when the block size is smaller
+	 * than the folio size.
+	 */
+	sfs = kzalloc(struct_size(sfs, state, BITS_TO_LONGS(nr_blocks)), gfp);
+	if (!sfs)
+		return sfs;
+
+	spin_lock_init(&sfs->state_lock);
+	if (folio_test_uptodate(folio))
+		bitmap_set(sfs->state, 0, nr_blocks);
+	folio_attach_private(folio, sfs);
+
+	return sfs;
+}
+
+static void sfs_free(struct folio *folio, bool force)
+{
+	if (!folio_test_private(folio))
+		return;
+
+	if (!force)
+		WARN_ON_ONCE(sfs_is_fully_uptodate(folio) !=
+			     folio_test_uptodate(folio));
+
+	kfree(folio_detach_private(folio));
+}
+
+static void shmem_set_range_uptodate(struct folio *folio, size_t off,
+				     size_t len)
+{
+	struct shmem_folio_state *sfs = folio->private;
+
+	if (sfs)
+		sfs_set_range_uptodate(folio, sfs, off, len);
+	else
+		folio_mark_uptodate(folio);
+}
 #ifdef CONFIG_TMPFS
 static unsigned long shmem_default_max_blocks(void)
 {
@@ -1487,7 +1605,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		}
 		folio_zero_range(folio, 0, folio_size(folio));
 		flush_dcache_folio(folio);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 	}
 
 	swap = folio_alloc_swap(folio);
@@ -1769,13 +1887,16 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
 	if (!new)
 		return -ENOMEM;
 
+	if (folio_get_private(old))
+		folio_attach_private(new, folio_detach_private(old));
+
 	folio_get(new);
 	folio_copy(new, old);
 	flush_dcache_folio(new);
 
 	__folio_set_locked(new);
 	__folio_set_swapbacked(new);
-	folio_mark_uptodate(new);
+	shmem_set_range_uptodate(new, 0, folio_size(new));
 	new->swap = entry;
 	folio_set_swapcache(new);
 
@@ -2063,6 +2184,12 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 
 alloced:
 	alloced = true;
+
+	if (!sfs_alloc(inode, folio) && folio_test_large(folio)) {
+		error = -ENOMEM;
+		goto unlock;
+	}
+
 	if (folio_test_pmd_mappable(folio) &&
 	    DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
 					folio_next_index(folio) - 1) {
@@ -2104,7 +2231,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 		for (i = 0; i < n; i++)
 			clear_highpage(folio_page(folio, i));
 		flush_dcache_folio(folio);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 	}
 
 	/* Perhaps the file has been truncated since we checked */
@@ -2773,8 +2900,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 			folio_zero_segments(folio, 0, from,
 					from + copied, folio_size(folio));
 		}
-		folio_mark_uptodate(folio);
 	}
+	shmem_set_range_uptodate(folio, 0, folio_size(folio));
 	folio_mark_dirty(folio);
 	folio_unlock(folio);
 	folio_put(folio);
@@ -2782,6 +2909,59 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 	return copied;
 }
 
+static void shmem_invalidate_folio(struct folio *folio, size_t offset,
+				   size_t len)
+{
+	/*
+	 * If we're invalidating the entire folio, clear the dirty state
+	 * from it and release it to avoid unnecessary buildup of the LRU.
+	 */
+	if (offset == 0 && len == folio_size(folio)) {
+		WARN_ON_ONCE(folio_test_writeback(folio));
+		folio_cancel_dirty(folio);
+		sfs_free(folio, true);
+	}
+}
+
+static bool shmem_release_folio(struct folio *folio, gfp_t gfp_flags)
+{
+	if (folio_test_dirty(folio) && !sfs_is_fully_uptodate(folio))
+		return false;
+
+	sfs_free(folio, false);
+	return true;
+}
+
+/*
+ * shmem_is_partially_uptodate checks whether blocks within a folio are
+ * uptodate or not.
+ *
+ * Returns true if all blocks which correspond to the specified part
+ * of the folio are uptodate.
+ */
+static bool shmem_is_partially_uptodate(struct folio *folio, size_t from,
+					size_t count)
+{
+	struct shmem_folio_state *sfs = folio->private;
+	struct inode *inode = folio->mapping->host;
+	unsigned int first, last;
+
+	if (!sfs)
+		return false;
+
+	/* Caller's range may extend past the end of this folio */
+	count = min(folio_size(folio) - from, count);
+
+	/* First and last blocks in range within folio */
+	first = from >> inode->i_blkbits;
+	last = (from + count - 1) >> inode->i_blkbits;
+
+	if (sfs_get_last_block_uptodate(folio, first, last) != last)
+		return false;
+
+	return true;
+}
+
 static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
@@ -3533,7 +3713,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 			goto out_remove_offset;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
-		folio_mark_uptodate(folio);
+		shmem_set_range_uptodate(folio, 0, folio_size(folio));
 		folio_mark_dirty(folio);
 		folio_unlock(folio);
 		folio_put(folio);
@@ -4523,7 +4703,10 @@ static const struct address_space_operations shmem_aops = {
 #ifdef CONFIG_MIGRATION
 	.migrate_folio	= migrate_folio,
 #endif
-	.error_remove_folio = shmem_error_remove_folio,
+	.error_remove_folio    = shmem_error_remove_folio,
+	.invalidate_folio      = shmem_invalidate_folio,
+	.release_folio         = shmem_release_folio,
+	.is_partially_uptodate = shmem_is_partially_uptodate,
 };
 
 static const struct file_operations shmem_file_operations = {
-- 
2.43.0


  parent reply	other threads:[~2024-05-15  5:57 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20240515055723eucas1p11bf14732f7fac943e688369ff7765f79@eucas1p1.samsung.com>
2024-05-15  5:57 ` [PATCH 00/12] [LSF/MM/BPF RFC] shmem/tmpfs: add large folios support Daniel Gomez
     [not found]   ` <CGME20240515055724eucas1p1c502dbded4dc6ff929c7aff570de80c2@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 01/12] splice: don't check for uptodate if partially uptodate is impl Daniel Gomez
     [not found]   ` <CGME20240515055726eucas1p2a795fc743373571bfc3349f9e1ef3f9e@eucas1p2.samsung.com>
2024-05-15  5:57     ` Daniel Gomez [this message]
     [not found]   ` <CGME20240515055727eucas1p2413c65b8b227ac0c6007b4600574abd8@eucas1p2.samsung.com>
2024-05-15  5:57     ` [PATCH 03/12] shmem: move folio zero operation to write_begin() Daniel Gomez
     [not found]   ` <CGME20240515055728eucas1p181e0ed81b2663eb0eee6d6134c1c1956@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 04/12] shmem: exit shmem_get_folio_gfp() if block is uptodate Daniel Gomez
     [not found]   ` <CGME20240515055729eucas1p14e953424ad39bbb923c64163b1bbd4b3@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 05/12] shmem: clear_highpage() if block is not uptodate Daniel Gomez
     [not found]   ` <CGME20240515055731eucas1p12cbbba88e24a011ef5871f90ff25ae73@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 06/12] shmem: set folio uptodate when reclaim Daniel Gomez
     [not found]   ` <CGME20240515055732eucas1p2302bbca4d60e2e811a5c59e34f83628d@eucas1p2.samsung.com>
2024-05-15  5:57     ` [PATCH 07/12] shmem: check if a block is uptodate before splice into pipe Daniel Gomez
2024-05-16 13:19       ` kernel test robot
     [not found]   ` <CGME20240515055733eucas1p2804d2fb5f5bf7d6adb460054f6e9f4d8@eucas1p2.samsung.com>
2024-05-15  5:57     ` [PATCH 08/12] shmem: clear uptodate blocks after PUNCH_HOLE Daniel Gomez
     [not found]   ` <CGME20240515055735eucas1p2a967b4eebc8e059588cd62139f006b0d@eucas1p2.samsung.com>
2024-05-15  5:57     ` [PATCH 09/12] shmem: enable per-block uptodate Daniel Gomez
     [not found]   ` <CGME20240515055736eucas1p1bfa9549398e766532d143ba9314bee18@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 10/12] shmem: add order arg to shmem_alloc_folio() Daniel Gomez
     [not found]   ` <CGME20240515055738eucas1p15335a32c790b731aa5857193bbddf92d@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 11/12] shmem: add file length arg in shmem_get_folio() path Daniel Gomez
2024-05-15 17:47       ` kernel test robot
2024-05-17 16:17       ` Darrick J. Wong
2024-05-21 11:38         ` Daniel Gomez
2024-05-21 16:36           ` Darrick J. Wong
     [not found]   ` <CGME20240515055740eucas1p1bf112e73a7009a0f9b2bbf09c989a51b@eucas1p1.samsung.com>
2024-05-15  5:57     ` [PATCH 12/12] shmem: add large folio support to the write and fallocate paths Daniel Gomez
2024-05-15 18:59       ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240515055719.32577-3-da.gomez@samsung.com \
    --to=da.gomez@samsung.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=brauner@kernel.org \
    --cc=chandan.babu@oracle.com \
    --cc=dagmcr@gmail.com \
    --cc=david@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=mcgrof@kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=ritesh.list@gmail.com \
    --cc=willy@infradead.org \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox