linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: netdev@vger.kernel.org
Cc: David Howells <dhowells@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Dave Chinner <david@fromorbit.com>,
	Matt Whitlock <kernel@mattwhitlock.name>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Jens Axboe <axboe@kernel.dk>,
	linux-fsdevel@kvack.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH 1/4] splice: Fix corruption of spliced data after splice() returns
Date: Thu, 29 Jun 2023 16:54:30 +0100	[thread overview]
Message-ID: <20230629155433.4170837-2-dhowells@redhat.com> (raw)
In-Reply-To: <20230629155433.4170837-1-dhowells@redhat.com>

Splicing data from, say, a file into a pipe currently leaves the source
pages in the pipe after splice() returns - but this means that those pages
can be subsequently modified by shared-writable mmap(), write(),
fallocate(), etc. before they're consumed.

Fix this by stealing the pages in splice() before they're added to the pipe
if no one else is using them or has them mapped and copying them otherwise.

Reported-by: Matt Whitlock <kernel@mattwhitlock.name>
Link: https://lore.kernel.org/r/ec804f26-fa76-4fbe-9b1c-8fbbd829b735@mattwhitlock.name/
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: linux-fsdevel@vger.kernel.org
---
 mm/filemap.c  | 92 ++++++++++++++++++++++++++++++++++++++++++++++++---
 mm/internal.h |  4 +--
 mm/shmem.c    |  8 +++--
 3 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 9e44a49bbd74..a002df515966 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2838,15 +2838,87 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
+static inline void copy_folio_to_folio(struct folio *src, size_t src_offset,
+				       struct folio *dst, size_t dst_offset,
+				       size_t size)
+{
+	void *p, *q;
+
+	while (size > 0) {
+		size_t part = min3(PAGE_SIZE - src_offset % PAGE_SIZE,
+				   PAGE_SIZE - dst_offset % PAGE_SIZE,
+				   size);
+
+		p = kmap_local_folio(src, src_offset);
+		q = kmap_local_folio(dst, dst_offset);
+		memcpy(q, p, part);
+		kunmap_local(p);
+		kunmap_local(q);
+		src_offset += part;
+		dst_offset += part;
+		size -= part;
+	}
+}
+
 /*
- * Splice subpages from a folio into a pipe.
+ * Splice data from a folio into a pipe.  The folio is stolen if no one else is
+ * using it and copied otherwise.  We can't put the folio into the pipe still
+ * attached to the pagecache as that allows someone to modify it after the
+ * splice.
  */
-size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
-			      struct folio *folio, loff_t fpos, size_t size)
+ssize_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+			       struct folio *folio, loff_t fpos, size_t size)
 {
+	struct address_space *mapping;
+	struct folio *copy = NULL;
 	struct page *page;
+	unsigned int flags = 0;
+	ssize_t ret;
 	size_t spliced = 0, offset = offset_in_folio(folio, fpos);
 
+	folio_lock(folio);
+
+	mapping = folio_mapping(folio);
+	ret = -ENODATA;
+	if (!folio->mapping)
+		goto err_unlock; /* Truncated */
+	ret = -EIO;
+	if (!folio_test_uptodate(folio))
+		goto err_unlock;
+
+	/*
+	 * At least for ext2 with nobh option, we need to wait on writeback
+	 * completing on this folio, since we'll remove it from the pagecache.
+	 * Otherwise truncate wont wait on the folio, allowing the disk blocks
+	 * to be reused by someone else before we actually wrote our data to
+	 * them. fs corruption ensues.
+	 */
+	folio_wait_writeback(folio);
+
+	if (folio_has_private(folio) &&
+	    !filemap_release_folio(folio, GFP_KERNEL))
+		goto need_copy;
+
+	/* If we succeed in removing the mapping, set LRU flag and add it. */
+	if (remove_mapping(mapping, folio)) {
+		folio_unlock(folio);
+		flags = PIPE_BUF_FLAG_LRU;
+		goto add_to_pipe;
+	}
+
+need_copy:
+	folio_unlock(folio);
+
+	copy = folio_alloc(GFP_KERNEL, 0);
+	if (!copy)
+		return -ENOMEM;
+
+	size = min(size, PAGE_SIZE - offset % PAGE_SIZE);
+	copy_folio_to_folio(folio, offset, copy, 0, size);
+	folio = copy;
+	offset = 0;
+
+add_to_pipe:
 	page = folio_page(folio, offset / PAGE_SIZE);
 	size = min(size, folio_size(folio) - offset);
 	offset %= PAGE_SIZE;
@@ -2861,6 +2933,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
 			.page	= page,
 			.offset	= offset,
 			.len	= part,
+			.flags	= flags,
 		};
 		folio_get(folio);
 		pipe->head++;
@@ -2869,7 +2942,13 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
 		offset = 0;
 	}
 
+	if (copy)
+		folio_put(copy);
 	return spliced;
+
+err_unlock:
+	folio_unlock(folio);
+	return ret;
 }
 
 /**
@@ -2947,7 +3026,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
 
 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 			struct folio *folio = fbatch.folios[i];
-			size_t n;
+			ssize_t n;
 
 			if (folio_pos(folio) >= end_offset)
 				goto out;
@@ -2963,8 +3042,11 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
 
 			n = min_t(loff_t, len, isize - *ppos);
 			n = splice_folio_into_pipe(pipe, folio, *ppos, n);
-			if (!n)
+			if (n <= 0) {
+				if (n < 0)
+					error = n;
 				goto out;
+			}
 			len -= n;
 			total_spliced += n;
 			*ppos += n;
diff --git a/mm/internal.h b/mm/internal.h
index a7d9e980429a..ae395e0f31d5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -881,8 +881,8 @@ struct migration_target_control {
 /*
  * mm/filemap.c
  */
-size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
-			      struct folio *folio, loff_t fpos, size_t size);
+ssize_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+			       struct folio *folio, loff_t fpos, size_t size);
 
 /*
  * mm/vmalloc.c
diff --git a/mm/shmem.c b/mm/shmem.c
index 2f2e0e618072..969931b0f00e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2783,7 +2783,8 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	struct inode *inode = file_inode(in);
 	struct address_space *mapping = inode->i_mapping;
 	struct folio *folio = NULL;
-	size_t total_spliced = 0, used, npages, n, part;
+	ssize_t n;
+	size_t total_spliced = 0, used, npages, part;
 	loff_t isize;
 	int error = 0;
 
@@ -2844,8 +2845,11 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 			n = splice_zeropage_into_pipe(pipe, *ppos, len);
 		}
 
-		if (!n)
+		if (n <= 0) {
+			if (n < 0)
+				error = n;
 			break;
+		}
 		len -= n;
 		total_spliced += n;
 		*ppos += n;



  reply	other threads:[~2023-06-29 15:55 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-29 15:54 [RFC PATCH 0/4] splice: Fix corruption in data spliced to pipe David Howells
2023-06-29 15:54 ` David Howells [this message]
2023-07-19 10:17   ` [RFC PATCH 1/4] splice: Fix corruption of spliced data after splice() returns Miklos Szeredi
2023-07-19 17:59     ` Matt Whitlock
2023-07-19 19:35       ` Miklos Szeredi
2023-07-19 19:44         ` Matthew Wilcox
2023-07-19 19:56           ` Miklos Szeredi
2023-07-19 20:04             ` Matthew Wilcox
2023-07-19 20:16           ` Linus Torvalds
2023-07-19 21:02             ` Matt Whitlock
2023-07-19 23:20               ` Linus Torvalds
2023-07-19 23:41                 ` Matt Whitlock
2023-07-20  0:00                   ` Linus Torvalds
2023-07-19 23:48                 ` Linus Torvalds
2023-07-24  9:44           ` David Howells
2023-07-24 13:55             ` Miklos Szeredi
2023-07-24 16:15             ` David Howells
2023-06-29 15:54 ` [RFC PATCH 2/4] splice: Make vmsplice() steal or copy David Howells
2023-06-30 13:44   ` Simon Horman
2023-06-30 15:29   ` David Howells
2023-06-30 17:32     ` Simon Horman
2023-06-29 15:54 ` [RFC PATCH 3/4] splice: Remove some now-unused bits David Howells
2023-06-29 15:54 ` [RFC PATCH 4/4] splice: Record some statistics David Howells
2023-06-29 17:56 ` [RFC PATCH 0/4] splice: Fix corruption in data spliced to pipe Linus Torvalds
2023-06-29 18:05   ` Matt Whitlock
2023-06-29 18:19     ` Linus Torvalds
2023-06-29 18:34       ` Matthew Wilcox
2023-06-29 18:53         ` Linus Torvalds
2023-06-30 16:50         ` David Howells
2023-06-29 18:42       ` Linus Torvalds
2023-06-29 18:16 ` Matt Whitlock
2023-06-30  0:01 ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230629155433.4170837-2-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=kernel@mattwhitlock.name \
    --cc=linux-fsdevel@kvack.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox