linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Miklos Szeredi <miklos@szeredi.hu>
From: Nick Piggin <npiggin@suse.de>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, Nick Piggin <npiggin@suse.de>,
	Christoph Hellwig <hch@infradead.org>
Subject: [patch 7/8] fuse: implement perform_write
Date: Mon, 17 Mar 2008 20:19:15 +0100	[thread overview]
Message-ID: <20080317191949.492303025@szeredi.hu> (raw)
In-Reply-To: <20080317191908.123631326@szeredi.hu>

[-- Attachment #1: fuse_perform_write.patch --]
[-- Type: text/plain, Size: 5933 bytes --]

Introduce fuse_perform_write. With fusexmp (a passthrough filesystem), large
(1MB) writes into a backing tmpfs filesystem are sped up by almost 4 times
(256MB/s vs 71MB/s).

[mszeredi@suse.cz]:

 - split into smaller functions
 - testing
 - duplicate generic_file_aio_write(), so that there's no need to add a
   new ->perform_write() a_op.  Comment from hch.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Christoph Hellwig <hch@infradead.org>
---
 fs/fuse/file.c |  194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 193 insertions(+), 1 deletion(-)

Index: linux/fs/fuse/file.c
===================================================================
--- linux.orig/fs/fuse/file.c	2008-03-17 18:26:28.000000000 +0100
+++ linux/fs/fuse/file.c	2008-03-17 18:35:26.000000000 +0100
@@ -677,6 +677,198 @@ static int fuse_write_end(struct file *f
 	return res;
 }
 
+static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
+				    struct inode *inode, loff_t pos,
+				    size_t count)
+{
+	size_t res;
+	unsigned offset;
+	unsigned i;
+
+	for (i = 0; i < req->num_pages; i++)
+		fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+
+	res = fuse_send_write(req, file, inode, pos, count, NULL);
+
+	offset = req->page_offset;
+	count = res;
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+
+		if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+			SetPageUptodate(page);
+
+		if (count > PAGE_CACHE_SIZE - offset)
+			count -= PAGE_CACHE_SIZE - offset;
+		else
+			count = 0;
+		offset = 0;
+
+		unlock_page(page);
+		page_cache_release(page);
+	}
+
+	return res;
+}
+
+static ssize_t fuse_fill_write_pages(struct fuse_req *req,
+			       struct address_space *mapping,
+			       struct iov_iter *ii, loff_t pos)
+{
+	struct fuse_conn *fc = get_fuse_conn(mapping->host);
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	size_t count = 0;
+	int err;
+
+	req->page_offset = offset;
+
+	do {
+		size_t tmp;
+		struct page *page;
+		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+		size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+				     iov_iter_count(ii));
+
+		bytes = min_t(size_t, bytes, fc->max_write - count);
+
+ again:
+		err = -EFAULT;
+		if (iov_iter_fault_in_readable(ii, bytes))
+			break;
+
+		err = -ENOMEM;
+		page = __grab_cache_page(mapping, index);
+		if (!page)
+			break;
+
+		pagefault_disable();
+		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+		pagefault_enable();
+		flush_dcache_page(page);
+
+		if (!tmp) {
+			unlock_page(page);
+			page_cache_release(page);
+			bytes = min(bytes, iov_iter_single_seg_count(ii));
+			goto again;
+		}
+
+		err = 0;
+		req->pages[req->num_pages] = page;
+		req->num_pages++;
+
+		iov_iter_advance(ii, tmp);
+		count += tmp;
+		pos += tmp;
+		offset += tmp;
+		if (offset == PAGE_CACHE_SIZE)
+			offset = 0;
+
+	} while (iov_iter_count(ii) && count < fc->max_write &&
+		 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+
+	return count > 0 ? count : err;
+}
+
+static ssize_t fuse_perform_write(struct file *file,
+				  struct address_space *mapping,
+				  struct iov_iter *ii, loff_t pos)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int err = 0;
+	ssize_t res = 0;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	do {
+		struct fuse_req *req;
+		ssize_t count;
+
+		req = fuse_get_req(fc);
+		if (IS_ERR(req)) {
+			err = PTR_ERR(req);
+			break;
+		}
+
+		count = fuse_fill_write_pages(req, mapping, ii, pos);
+		if (count <= 0) {
+			err = count;
+		} else {
+			size_t num_written;
+
+			num_written = fuse_send_write_pages(req, file, inode,
+							    pos, count);
+			err = req->out.h.error;
+			if (!err) {
+				res += num_written;
+				pos += num_written;
+
+				/* break out of the loop on short write */
+				if (num_written != count)
+					err = -EIO;
+			}
+		}
+		fuse_put_request(fc, req);
+	} while (!err && iov_iter_count(ii));
+
+	if (res > 0)
+		fuse_write_update_size(inode, pos);
+
+	fuse_invalidate_attr(inode);
+
+	return res > 0 ? res : err;
+}
+
+static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				   unsigned long nr_segs, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	size_t count = 0;
+	ssize_t written = 0;
+	struct inode *inode = mapping->host;
+	ssize_t err;
+	struct iov_iter i;
+
+	WARN_ON(iocb->ki_pos != pos);
+
+	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+	if (err)
+		return err;
+
+	mutex_lock(&inode->i_mutex);
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = mapping->backing_dev_info;
+
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+
+	if (count == 0)
+		goto out;
+
+	err = remove_suid(file->f_path.dentry);
+	if (err)
+		goto out;
+
+	file_update_time(file);
+
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+	written = fuse_perform_write(file, mapping, &i, pos);
+	if (written >= 0)
+		iocb->ki_pos = pos + written;
+
+out:
+	current->backing_dev_info = NULL;
+	mutex_unlock(&inode->i_mutex);
+
+	return written ? written : err;
+}
+
 static void fuse_release_user_pages(struct fuse_req *req, int write)
 {
 	unsigned i;
@@ -1202,7 +1394,7 @@ static const struct file_operations fuse
 	.read		= do_sync_read,
 	.aio_read	= fuse_file_aio_read,
 	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.aio_write	= fuse_file_aio_write,
 	.mmap		= fuse_file_mmap,
 	.open		= fuse_open,
 	.flush		= fuse_flush,

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-03-17 19:19 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-17 19:19 [patch 0/8] fuse: writable mmap + batched write Miklos Szeredi
2008-03-17 19:19 ` [patch 1/8] mm: bdi: export bdi_writeout_inc() Miklos Szeredi, Miklos Szeredi
2008-03-18 11:27   ` Peter Zijlstra
2008-03-18 11:46     ` Miklos Szeredi
2008-03-17 19:19 ` [patch 2/8] mm: Add NR_WRITEBACK_TEMP counter Miklos Szeredi, Miklos Szeredi
2008-03-18  5:05   ` Andrew Morton
2008-03-17 19:19 ` [patch 3/8] mm: rotate_reclaimable_page() cleanup Miklos Szeredi, Miklos Szeredi
2008-03-18 11:31   ` Peter Zijlstra
2008-03-18 11:56     ` Miklos Szeredi
2008-03-18 16:45       ` Andrew Morton
2008-03-17 19:19 ` [patch 4/8] mm: allow not updating BDI stats in end_page_writeback() Miklos Szeredi, Miklos Szeredi
2008-03-18  5:04   ` Andrew Morton
2008-03-18  8:11     ` Miklos Szeredi
2008-03-18  8:18       ` Andrew Morton
2008-03-18 11:33   ` Peter Zijlstra
2008-03-18 11:59     ` Miklos Szeredi
2008-03-18 12:29       ` Peter Zijlstra
2008-03-18 12:51         ` Miklos Szeredi
2008-03-18 13:08           ` Peter Zijlstra
2008-03-18 13:58             ` Miklos Szeredi
2008-03-18 13:59               ` Peter Zijlstra
2008-03-18 15:53                 ` Miklos Szeredi
2008-03-18 16:49                   ` Andrew Morton
2008-03-17 19:19 ` [patch 5/8] fuse: support writable mmap Miklos Szeredi, Miklos Szeredi
2008-03-17 19:19 ` [patch 6/8] fuse: clean up setting i_size in write Miklos Szeredi, Miklos Szeredi
2008-03-18  5:08   ` Andrew Morton
2008-03-18  8:16     ` Miklos Szeredi
2008-03-17 19:19 ` Miklos Szeredi, Nick Piggin [this message]
2008-03-17 19:19 ` [patch 8/8] fuse: update file size on short read Miklos Szeredi, Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080317191949.492303025@szeredi.hu \
    --to=miklos@szeredi.hu \
    --cc=akpm@linux-foundation.org \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox