linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Oren Laadan <orenl@cs.columbia.edu>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@osdl.org>,
	containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-api@vger.kernel.org, Serge Hallyn <serue@us.ibm.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Pavel Emelyanov <xemul@openvz.org>,
	Alexey Dobriyan <adobriyan@gmail.com>,
	Oren Laadan <orenl@cs.columbia.edu>
Subject: [RFC v16][PATCH 26/43] splice: added support for pipe-to-pipe splice()
Date: Wed, 27 May 2009 13:32:52 -0400	[thread overview]
Message-ID: <1243445589-32388-27-git-send-email-orenl@cs.columbia.edu> (raw)
In-Reply-To: <1243445589-32388-1-git-send-email-orenl@cs.columbia.edu>

This patch is a modified version of Max Kellerman patch that allows
splice() between pipes (see http://patchwork/kernel/org/patch/21042).
By refactoring link_pipe(), do_tee() and do_splice_pipes() shrink
considerably. Below is Max's original description:

--
This patch enables the splice() system call to copy buffers from one
pipe to another.  This obvious and trivial use case for splice() was
not supported until now.

It reuses the functions link_ipipe_prep() and link_opipe_prep() from
the tee() system call implementation.
--

Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
---
 fs/splice.c |  203 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 166 insertions(+), 37 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 92dd63c..96e0d58 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -903,13 +903,95 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
 EXPORT_SYMBOL(generic_splice_sendpage);
 
 /*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline struct pipe_inode_info *pipe_info(struct inode *inode)
+{
+	if (S_ISFIFO(inode->i_mode))
+		return inode->i_pipe;
+
+	return NULL;
+}
+
+static int link_pipe_prep(struct pipe_inode_info *ipipe,
+			  struct pipe_inode_info *opipe,
+			  unsigned int flags);
+static long do_link_pipe(struct pipe_inode_info *ipipe,
+			 struct pipe_inode_info *opipe,
+			 size_t len, unsigned int flags, int move);
+
+/**
+* Splice pages from one pipe to another.
+*
+* @ipipe the input pipe
+* @opipe the output pipe
+* @len the maximum number of bytes to move
+* @flags splice modifier flags
+*/
+static long do_splice_pipes(struct pipe_inode_info *ipipe,
+			    struct pipe_inode_info *opipe,
+			    size_t len, unsigned int flags)
+{
+	int do_wakeup;
+	long ret;
+
+	if (ipipe == opipe)
+		return -EINVAL;
+
+	ret = link_pipe_prep(ipipe, opipe, flags);
+	if (ret < 0)
+		return ret;
+
+	/* both pipes are now locked */
+
+	do_wakeup = ipipe->nrbufs;
+	ret = do_link_pipe(ipipe, opipe, len, flags, 1);
+	do_wakeup -= ipipe->nrbufs;
+
+	pipe_unlock(ipipe);
+	pipe_unlock(opipe);
+
+	if (do_wakeup) {
+		/* at least one buffer was removed from the
+		   input pipe: wake up potential writers */
+		smp_mb();
+		if (waitqueue_active(&ipipe->wait))
+			wake_up_interruptible(&ipipe->wait);
+		kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+	}
+
+	/*
+	 * If we put data in the output pipe, wakeup any potential
+	 * readers.
+	 */
+	if (ret > 0) {
+		smp_mb();
+		if (waitqueue_active(&opipe->wait))
+			wake_up_interruptible(&opipe->wait);
+		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+	}
+
+	return ret;
+}
+
+/*
  * Attempt to initiate a splice from pipe to file.
  */
 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 			   loff_t *ppos, size_t len, unsigned int flags)
 {
+	struct pipe_inode_info *opipe;
 	int ret;
 
+	opipe = pipe_info(out->f_dentry->d_inode);
+	if (opipe) {
+		if (unlikely(!(out->f_mode & FMODE_WRITE)))
+			return -EBADF;
+		return do_splice_pipes(pipe, opipe, len, flags);
+	}
+
 	if (unlikely(!out->f_op || !out->f_op->splice_write))
 		return -EINVAL;
 
@@ -933,8 +1015,16 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	struct pipe_inode_info *ipipe;
 	int ret;
 
+	ipipe = pipe_info(in->f_dentry->d_inode);
+	if (ipipe) {
+		if (unlikely(!(in->f_mode & FMODE_READ)))
+			return -EBADF;
+		return do_splice_pipes(ipipe, pipe, len, flags);
+	}
+
 	if (unlikely(!in->f_op || !in->f_op->splice_read))
 		return -EINVAL;
 
@@ -1113,19 +1203,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 }
 
 /*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *pipe_info(struct inode *inode)
-{
-	if (S_ISFIFO(inode->i_mode))
-		return inode->i_pipe;
-
-	return NULL;
-}
-
-/*
  * Determine where to splice to/from.
  */
 static long do_splice(struct file *in, loff_t __user *off_in,
@@ -1140,7 +1217,10 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	if (pipe) {
 		if (off_in)
 			return -ESPIPE;
+
 		if (off_out) {
+			if (pipe_info(out->f_path.dentry->d_inode))
+				return -ESPIPE;
 			if (out->f_op->llseek == no_llseek)
 				return -EINVAL;
 			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
@@ -1639,21 +1719,36 @@ static int link_pipe_prep(struct pipe_inode_info *ipipe,
 	return 0;
 }
 
-/*
- * Link contents of ipipe to opipe.
- */
-static int link_pipe(struct pipe_inode_info *ipipe,
-		     struct pipe_inode_info *opipe,
-		     size_t len, unsigned int flags)
+/**
+* Returns the nth pipe buffer after the current one.
+*
+* @i the buffer index, relative to the current one
+*/
+static inline struct pipe_buffer *
+pipe_buffer_at(struct pipe_inode_info *pipe, unsigned i)
 {
-	struct pipe_buffer *ibuf, *obuf;
-	int ret, i = 0, nbuf;
+	BUG_ON(i >= PIPE_BUFFERS);
 
-	ret = link_pipe_prep(ipipe, opipe, flags);
-	if (ret < 0)
-		return ret;
+	return pipe->bufs + ((pipe->curbuf + i) & (PIPE_BUFFERS - 1));
+}
 
-	/* pipes are now locked */
+/**
+ * do_link_pipe - copy or move (splice) contents of ipipe to opipe.
+ * @ipipe: the input pipe
+ * @opipe: the output pipe
+ * @len: the maximum number of byte to copy/move
+ * @flags: splice modifier flags
+ * @move: indicates move operation (otherwise copy)
+ *
+ * expects both pipes to be locked
+ */
+static long do_link_pipe(struct pipe_inode_info *ipipe,
+			struct pipe_inode_info *opipe,
+			size_t len, unsigned int flags, int move)
+{
+	struct pipe_buffer *ibuf, *obuf;
+	long ret = 0;
+	int i = 0;
 
 	do {
 		if (!opipe->readers) {
@@ -1670,16 +1765,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
 			break;
 
-		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
-		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
-
-		/*
-		 * Get a reference to this pipe buffer,
-		 * so we can copy the contents over.
-		 */
-		ibuf->ops->get(ipipe, ibuf);
-
-		obuf = opipe->bufs + nbuf;
+		ibuf = pipe_buffer_at(ipipe, i);
+		obuf = pipe_buffer_at(opipe, opipe->nrbufs);
 		*obuf = *ibuf;
 
 		/*
@@ -1688,13 +1775,35 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 */
 		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
-		if (obuf->len > len)
+		/* increase the reference count */
+		obuf->ops->get(opipe, obuf);
+
+		/* partial or complete copy/move ? */
+		if (obuf->len > len) {
 			obuf->len = len;
+			if (move) {
+				/* remove portion from ibuf */
+				ibuf->offset += len;
+				ibuf->len -= len;
+			}
+		} else {
+			if (move) {
+				/* remove entirely from ibuf */
+				ibuf->ops->release(ipipe, ibuf);
+				ibuf->ops = NULL;
+
+				ipipe->curbuf = (ipipe->curbuf + 1) &
+					(PIPE_BUFFERS - 1);
+				ipipe->nrbufs--;
+			} else {
+				/* advance pointer in ibuf */
+				i++;
+			}
+		}
 
 		opipe->nrbufs++;
 		ret += obuf->len;
 		len -= obuf->len;
-		i++;
 	} while (len);
 
 	/*
@@ -1704,6 +1813,26 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
 		ret = -EAGAIN;
 
+	return ret;
+}
+
+/*
+ * Link contents of ipipe to opipe.
+ */
+long link_pipe(struct pipe_inode_info *ipipe,
+	       struct pipe_inode_info *opipe,
+	       size_t len, unsigned int flags)
+{
+	long ret;
+
+	ret = link_pipe_prep(ipipe, opipe, flags);
+	if (ret < 0)
+		return ret;
+
+	/* pipes are now locked */
+
+	ret = do_link_pipe(ipipe, opipe, len, flags, 0);
+
 	pipe_unlock(ipipe);
 	pipe_unlock(opipe);
 
-- 
1.6.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-05-27 17:43 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-27 17:32 [RFC v16][PATCH 00/43] Kernel based checkpoint/restart Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 01/43] c/r: extend arch_setup_additional_pages() Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 02/43] c/r: make file_pos_read/write() public Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 03/43] c/r: create syscalls: sys_checkpoint, sys_restart Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 04/43] c/r: documentation Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 05/43] c/r: basic infrastructure for checkpoint/restart Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 06/43] c/r: x86_32 support " Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 07/43] c/r: infrastructure for shared objects Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 08/43] c/r: introduce '->checkpoint()' method in 'struct file_operations' Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 09/43] c/r: dump open file descriptors Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 10/43] c/r: restore " Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 11/43] c/r: add generic '->checkpoint' f_op to ext fses Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 12/43] c/r: add generic '->checkpoint()' f_op to simple devices Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 13/43] c/r: introduce method '->checkpoint()' in struct vm_operations_struct Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 14/43] c/r: dump memory address space (private memory) Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 15/43] c/r: restore " Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 16/43] c/r: export shmem_getpage() to support shared memory Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 17/43] c/r: dump anonymous- and file-mapped- " Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 18/43] c/r: restore " Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 19/43] c/r: external checkpoint of a task other than ourself Oren Laadan
2009-05-27 21:19   ` Alexey Dobriyan
2009-05-27 22:32     ` Oren Laadan
2009-05-28 16:33       ` Alexey Dobriyan
2009-05-27 17:32 ` [RFC v16][PATCH 20/43] c/r: export functionality used in next patch for restart-blocks Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 21/43] c/r: restart-blocks Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 22/43] c/r: checkpoint multiple processes Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 23/43] c/r: restart " Oren Laadan
2009-05-27 19:37   ` Alexey Dobriyan
2009-05-27 21:38     ` Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 24/43] c/r: detect resource leaks for whole-container checkpoint Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 25/43] tee: don't return 0 when another task drains/fills a pipe Oren Laadan
2009-05-27 17:32 ` Oren Laadan [this message]
2009-05-27 17:32 ` [RFC v16][PATCH 27/43] c/r: support for open pipes Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 28/43] c/r: make ckpt_may_checkpoint_task() check each namespace individually Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 29/43] c/r: support for UTS namespace Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 30/43] c/r: stub implementation for IPC namespace Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 31/43] deferqueue: generic queue to defer work Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 32/43] c/r (ipc): allow allocation of a desired ipc identifier Oren Laadan
2009-05-27 17:32 ` [RFC v16][PATCH 33/43] c/r (ipc): helpers to save and restore kern_ipc_perm structures Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 34/43] c/r: save and restore ipc namespace basics Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 35/43] c/r (ipc): export interface from ipc/shm.c to delete ipc shm Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 36/43] c/r: support share-memory sysv-ipc Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 37/43] c/r (ipc): make 'struct msg_msgseg' visible in ipc/util.h Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 38/43] c/r: support message-queues sysv-ipc Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 39/43] c/r (ipc): export interface from ipc/sem.c to cleanup ipc sem Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 40/43] c/r: support semaphore sysv-ipc Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 41/43] c/r: (s390): expose a constant for the number of words (CRs) Oren Laadan
2009-05-27 18:39   ` Alexey Dobriyan
2009-05-27 17:33 ` [RFC v16][PATCH 42/43] c/r: add CKPT_COPY() macro Oren Laadan
2009-05-27 17:33 ` [RFC v16][PATCH 43/43] c/r: define s390-specific checkpoint-restart code Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243445589-32388-27-git-send-email-orenl@cs.columbia.edu \
    --to=orenl@cs.columbia.edu \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hpa@zytor.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=serue@us.ibm.com \
    --cc=torvalds@osdl.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox