linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: wangtao <tao.wangtao@honor.com>
To: <sumit.semwal@linaro.org>, <christian.koenig@amd.com>,
	<kraxel@redhat.com>, <vivek.kasireddy@intel.com>,
	<viro@zeniv.linux.org.uk>, <brauner@kernel.org>,
	<hughd@google.com>, <akpm@linux-foundation.org>,
	<amir73il@gmail.com>
Cc: <benjamin.gaignard@collabora.com>, <Brian.Starkey@arm.com>,
	<jstultz@google.com>, <tjmercier@google.com>, <jack@suse.cz>,
	<baolin.wang@linux.alibaba.com>, <linux-media@vger.kernel.org>,
	<dri-devel@lists.freedesktop.org>,
	<linaro-mm-sig@lists.linaro.org>, <linux-kernel@vger.kernel.org>,
	<linux-fsdevel@vger.kernel.org>, <linux-mm@kvack.org>,
	<bintian.wang@honor.com>, <yipengxiang@honor.com>,
	<liulu.liu@honor.com>, <feng.han@honor.com>,
	wangtao <tao.wangtao@honor.com>
Subject: [PATCH v4 1/4] fs: allow cross-FS copy_file_range for memory file with direct I/O
Date: Tue, 3 Jun 2025 17:52:42 +0800	[thread overview]
Message-ID: <20250603095245.17478-2-tao.wangtao@honor.com> (raw)
In-Reply-To: <20250603095245.17478-1-tao.wangtao@honor.com>

Memory files can optimize copy performance via copy_file_range callbacks:
-Compared to mmap&read: reduces GUP (get_user_pages) overhead
-Compared to sendfile/splice: eliminates one memory copy
-Supports dma-buf direct I/O zero-copy implementation

Suggested by: Christian König <christian.koenig@amd.com>
Suggested by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: wangtao <tao.wangtao@honor.com>
---
 fs/read_write.c    | 64 +++++++++++++++++++++++++++++++++++++---------
 include/linux/fs.h |  2 ++
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index bb0ed26a0b3a..ecb4f753c632 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1469,6 +1469,31 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
 }
 #endif
 
+static const struct file_operations *memory_copy_file_ops(
+			struct file *file_in, struct file *file_out)
+{
+	if ((file_in->f_op->fop_flags & FOP_MEMORY_FILE) &&
+	    (file_in->f_mode & FMODE_CAN_ODIRECT) &&
+	    file_in->f_op->copy_file_range && file_out->f_op->write_iter)
+		return file_in->f_op;
+	else if ((file_out->f_op->fop_flags & FOP_MEMORY_FILE) &&
+		 (file_out->f_mode & FMODE_CAN_ODIRECT) &&
+		 file_in->f_op->read_iter && file_out->f_op->copy_file_range)
+		return file_out->f_op;
+	else
+		return NULL;
+}
+
+static int essential_file_rw_checks(struct file *file_in, struct file *file_out)
+{
+	if (!(file_in->f_mode & FMODE_READ) ||
+	    !(file_out->f_mode & FMODE_WRITE) ||
+	    (file_out->f_flags & O_APPEND))
+		return -EBADF;
+
+	return 0;
+}
+
 /*
  * Performs necessary checks before doing a file copy
  *
@@ -1484,9 +1509,16 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 	struct inode *inode_out = file_inode(file_out);
 	uint64_t count = *req_count;
 	loff_t size_in;
+	bool splice = flags & COPY_FILE_SPLICE;
+	const struct file_operations *mem_fops;
 	int ret;
 
-	ret = generic_file_rw_checks(file_in, file_out);
+	/* The dma-buf file is not a regular file. */
+	mem_fops = memory_copy_file_ops(file_in, file_out);
+	if (splice || mem_fops == NULL)
+		ret = generic_file_rw_checks(file_in, file_out);
+	else
+		ret = essential_file_rw_checks(file_in, file_out);
 	if (ret)
 		return ret;
 
@@ -1500,8 +1532,10 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 	 * and several different sets of file_operations, but they all end up
 	 * using the same ->copy_file_range() function pointer.
 	 */
-	if (flags & COPY_FILE_SPLICE) {
+	if (splice) {
 		/* cross sb splice is allowed */
+	} else if (mem_fops != NULL) {
+		/* cross-fs copy is allowed for memory file. */
 	} else if (file_out->f_op->copy_file_range) {
 		if (file_in->f_op->copy_file_range !=
 		    file_out->f_op->copy_file_range)
@@ -1554,6 +1588,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	ssize_t ret;
 	bool splice = flags & COPY_FILE_SPLICE;
 	bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
+	const struct file_operations *mem_fops;
 
 	if (flags & ~COPY_FILE_SPLICE)
 		return -EINVAL;
@@ -1574,18 +1609,27 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (len == 0)
 		return 0;
 
+	if (splice)
+		goto do_splice;
+
 	file_start_write(file_out);
 
 	/*
 	 * Cloning is supported by more file systems, so we implement copy on
 	 * same sb using clone, but for filesystems where both clone and copy
 	 * are supported (e.g. nfs,cifs), we only call the copy method.
+	 * For copy to/from memory file, we alway call the copy method of the
+	 * memory file.
 	 */
-	if (!splice && file_out->f_op->copy_file_range) {
+	mem_fops = memory_copy_file_ops(file_in, file_out);
+	if (mem_fops) {
+		ret = mem_fops->copy_file_range(file_in, pos_in,
+					file_out, pos_out, len, flags);
+	} else if (file_out->f_op->copy_file_range) {
 		ret = file_out->f_op->copy_file_range(file_in, pos_in,
-						      file_out, pos_out,
-						      len, flags);
-	} else if (!splice && file_in->f_op->remap_file_range && samesb) {
+						file_out, pos_out,
+						len, flags);
+	} else if (file_in->f_op->remap_file_range && samesb) {
 		ret = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
 				min_t(loff_t, MAX_RW_COUNT, len),
@@ -1603,6 +1647,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (!splice)
 		goto done;
 
+do_splice:
 	/*
 	 * We can get here for same sb copy of filesystems that do not implement
 	 * ->copy_file_range() in case filesystem does not support clone or in
@@ -1786,12 +1831,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
 		return -EINVAL;
 
-	if (!(file_in->f_mode & FMODE_READ) ||
-	    !(file_out->f_mode & FMODE_WRITE) ||
-	    (file_out->f_flags & O_APPEND))
-		return -EBADF;
-
-	return 0;
+	return essential_file_rw_checks(file_in, file_out);
 }
 
 int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..37df1b497418 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2187,6 +2187,8 @@ struct file_operations {
 #define FOP_ASYNC_LOCK		((__force fop_flags_t)(1 << 6))
 /* File system supports uncached read/write buffered IO */
 #define FOP_DONTCACHE		((__force fop_flags_t)(1 << 7))
+/* Supports cross-FS copy_file_range for memory file */
+#define FOP_MEMORY_FILE		((__force fop_flags_t)(1 << 8))
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
-- 
2.17.1



  reply	other threads:[~2025-06-03  9:54 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-03  9:52 [PATCH v4 0/4] Implement dmabuf direct I/O via copy_file_range wangtao
2025-06-03  9:52 ` wangtao [this message]
2025-06-03 10:56   ` [PATCH v4 1/4] fs: allow cross-FS copy_file_range for memory file with direct I/O Amir Goldstein
2025-06-03 12:38     ` wangtao
2025-06-03 12:43       ` Amir Goldstein
2025-06-03  9:52 ` [PATCH v4 2/4] dmabuf: Implement copy_file_range callback for dmabuf direct I/O prep wangtao
2025-06-03 10:42   ` Christian König
2025-06-03 12:26     ` wangtao
2025-06-03 13:04   ` Christoph Hellwig
2025-06-03  9:52 ` [PATCH v4 3/4] udmabuf: Implement udmabuf direct I/O wangtao
2025-06-03  9:52 ` [PATCH v4 4/4] dmabuf:system_heap Implement system_heap dmabuf " wangtao
2025-06-03 13:00 ` [PATCH v4 0/4] Implement dmabuf direct I/O via copy_file_range Christoph Hellwig
2025-06-03 13:14   ` Christian König
2025-06-03 13:19     ` Christoph Hellwig
2025-06-03 14:18       ` Christian König
2025-06-03 14:28         ` Christoph Hellwig
2025-06-03 15:55           ` Christian König
2025-06-03 16:01             ` Christoph Hellwig
2025-06-06  9:59               ` wangtao
2025-06-06  9:52       ` wangtao
2025-06-06 11:20         ` Christian König
2025-06-09  4:35           ` Christoph Hellwig
2025-06-09  9:32             ` wangtao
2025-06-10 10:52               ` Christian König
2025-06-10 13:37                 ` Christoph Hellwig
2025-06-13  9:43                   ` wangtao
2025-06-16  5:24                     ` Christoph Hellwig
2025-06-10 13:34               ` Christoph Hellwig
2025-06-13  9:33                 ` wangtao
2025-06-16  5:25                   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250603095245.17478-2-tao.wangtao@honor.com \
    --to=tao.wangtao@honor.com \
    --cc=Brian.Starkey@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=amir73il@gmail.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=benjamin.gaignard@collabora.com \
    --cc=bintian.wang@honor.com \
    --cc=brauner@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=feng.han@honor.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=jstultz@google.com \
    --cc=kraxel@redhat.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liulu.liu@honor.com \
    --cc=sumit.semwal@linaro.org \
    --cc=tjmercier@google.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vivek.kasireddy@intel.com \
    --cc=yipengxiang@honor.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox