linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2
       [not found] <cover.1415636409.git.milosz@adfin.com>
@ 2014-11-10 16:40 ` Milosz Tanski
  2014-11-11 21:09   ` Jeff Moyer
  2014-11-12 13:18   ` mohanty bhagaban
  2014-11-10 16:40 ` [PATCH v6 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
  2014-11-10 16:40 ` [PATCH v6 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
  2 siblings, 2 replies; 5+ messages in thread
From: Milosz Tanski @ 2014-11-10 16:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Hellwig, linux-fsdevel, linux-aio, Mel Gorman,
	Volker Lendecke, Tejun Heo, Jeff Moyer, Theodore Ts'o,
	Al Viro, linux-api, Michael Kerrisk, linux-arch, linux-mm

New syscalls that take an flag argument. This change does not add any specific
flags.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c                   | 172 ++++++++++++++++++++++++++++++--------
 include/linux/compat.h            |   6 ++
 include/linux/syscalls.h          |   6 ++
 include/uapi/asm-generic/unistd.h |   6 +-
 mm/filemap.c                      |   5 +-
 5 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 94b2d34..b1b4bc8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
+	if (flags & ~0)
+		return -EINVAL;
 
 	return do_readv_writev(READ, file, vec, vlen, pos, flags);
 }
@@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
+	if (flags & ~0)
+		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 }
 
 EXPORT_SYMBOL(vfs_writev);
 
-SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen)
+static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
+			unsigned long vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret = -EBADF;
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+		ret = vfs_readv(f.file, vec, vlen, &pos, flags);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -905,15 +909,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
-SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen)
+static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
+			 unsigned long vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret = -EBADF;
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+		ret = vfs_writev(f.file, vec, vlen, &pos, flags);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -931,10 +935,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 }
 
-SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
+			 unsigned long vlen, loff_t pos, int flags)
 {
-	loff_t pos = pos_from_hilo(pos_h, pos_l);
 	struct fd f;
 	ssize_t ret = -EBADF;
 
@@ -945,7 +948,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PREAD)
-			ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
 		fdput(f);
 	}
 
@@ -955,10 +958,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
-SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
+			  unsigned long vlen, loff_t pos, int flags)
 {
-	loff_t pos = pos_from_hilo(pos_h, pos_l);
 	struct fd f;
 	ssize_t ret = -EBADF;
 
@@ -969,7 +971,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PWRITE)
-			ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
 		fdput(f);
 	}
 
@@ -979,11 +981,63 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
+SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen)
+{
+	return do_readv(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen)
+{
+	return do_writev(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	return do_preadv(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+		int, flags)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	if (pos == -1)
+		return do_readv(fd, vec, vlen, flags);
+
+	return do_preadv(fd, vec, vlen, pos, flags);
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	return do_pwritev(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+		int, flags)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	if (pos == -1)
+		return do_writev(fd, vec, vlen, flags);
+
+	return do_pwritev(fd, vec, vlen, pos, flags);
+}
+
 #ifdef CONFIG_COMPAT
 
 static ssize_t compat_do_readv_writev(int type, struct file *file,
 			       const struct compat_iovec __user *uvector,
-			       unsigned long nr_segs, loff_t *pos)
+			       unsigned long nr_segs, loff_t *pos, int flags)
 {
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
@@ -1017,7 +1071,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 
 	if (iter_fn)
 		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-						pos, iter_fn, 0);
+						pos, iter_fn, flags);
 	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
@@ -1041,7 +1095,7 @@ out:
 
 static size_t compat_readv(struct file *file,
 			   const struct compat_iovec __user *vec,
-			   unsigned long vlen, loff_t *pos)
+			   unsigned long vlen, loff_t *pos, int flags)
 {
 	ssize_t ret = -EBADF;
 
@@ -1051,8 +1105,10 @@ static size_t compat_readv(struct file *file,
 	ret = -EINVAL;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
+	if (flags & ~0)
+		goto out;
 
-	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
 
 out:
 	if (ret > 0)
@@ -1061,9 +1117,9 @@ out:
 	return ret;
 }
 
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
-		compat_ulong_t, vlen)
+static size_t __compat_sys_readv(compat_ulong_t fd,
+				 const struct compat_iovec __user *vec,
+				 compat_ulong_t vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret;
@@ -1072,16 +1128,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
 	if (!f.file)
 		return -EBADF;
 	pos = f.file->f_pos;
-	ret = compat_readv(f.file, vec, vlen, &pos);
+	ret = compat_readv(f.file, vec, vlen, &pos, flags);
 	if (ret >= 0)
 		f.file->f_pos = pos;
 	fdput_pos(f);
 	return ret;
+
+}
+
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen)
+{
+	return __compat_sys_readv(fd, vec, vlen, 0);
 }
 
 static long __compat_sys_preadv64(unsigned long fd,
 				  const struct compat_iovec __user *vec,
-				  unsigned long vlen, loff_t pos)
+				  unsigned long vlen, loff_t pos, int flags)
 {
 	struct fd f;
 	ssize_t ret;
@@ -1093,7 +1157,7 @@ static long __compat_sys_preadv64(unsigned long fd,
 		return -EBADF;
 	ret = -ESPIPE;
 	if (f.file->f_mode & FMODE_PREAD)
-		ret = compat_readv(f.file, vec, vlen, &pos);
+		ret = compat_readv(f.file, vec, vlen, &pos, flags);
 	fdput(f);
 	return ret;
 }
@@ -1103,7 +1167,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 		const struct compat_iovec __user *,vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return __compat_sys_preadv64(fd, vec, vlen, pos);
+	return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
 }
 #endif
 
@@ -1113,12 +1177,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return __compat_sys_preadv64(fd, vec, vlen, pos);
+	return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
+		int, flags)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+	if (pos == -1)
+		return __compat_sys_readv(fd, vec, vlen, flags);
+
+	return __compat_sys_preadv64(fd, vec, vlen, pos, flags);
 }
 
 static size_t compat_writev(struct file *file,
 			    const struct compat_iovec __user *vec,
-			    unsigned long vlen, loff_t *pos)
+			    unsigned long vlen, loff_t *pos, int flags)
 {
 	ssize_t ret = -EBADF;
 
@@ -1128,8 +1205,10 @@ static size_t compat_writev(struct file *file,
 	ret = -EINVAL;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
+	if (flags & ~0)
+		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 
 out:
 	if (ret > 0)
@@ -1138,9 +1217,9 @@ out:
 	return ret;
 }
 
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
-		const struct compat_iovec __user *, vec,
-		compat_ulong_t, vlen)
+static size_t __compat_sys_writev(compat_ulong_t fd,
+				  const struct compat_iovec __user* vec,
+				  compat_ulong_t vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret;
@@ -1149,28 +1228,36 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
 	if (!f.file)
 		return -EBADF;
 	pos = f.file->f_pos;
-	ret = compat_writev(f.file, vec, vlen, &pos);
+	ret = compat_writev(f.file, vec, vlen, &pos, flags);
 	if (ret >= 0)
 		f.file->f_pos = pos;
 	fdput_pos(f);
 	return ret;
 }
 
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
+		const struct compat_iovec __user *, vec,
+		compat_ulong_t, vlen)
+{
+	return __compat_sys_writev(fd, vec, vlen, 0);
+}
+
 static long __compat_sys_pwritev64(unsigned long fd,
 				   const struct compat_iovec __user *vec,
-				   unsigned long vlen, loff_t pos)
+				   unsigned long vlen, loff_t pos, int flags)
 {
 	struct fd f;
 	ssize_t ret;
 
 	if (pos < 0)
 		return -EINVAL;
+
 	f = fdget(fd);
 	if (!f.file)
 		return -EBADF;
 	ret = -ESPIPE;
 	if (f.file->f_mode & FMODE_PWRITE)
-		ret = compat_writev(f.file, vec, vlen, &pos);
+		ret = compat_writev(f.file, vec, vlen, &pos, flags);
 	fdput(f);
 	return ret;
 }
@@ -1180,7 +1267,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
 		const struct compat_iovec __user *,vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return __compat_sys_pwritev64(fd, vec, vlen, pos);
+	return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
 }
 #endif
 
@@ -1190,8 +1277,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return __compat_sys_pwritev64(fd, vec, vlen, pos);
+	return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+	if (pos == -1)
+		return __compat_sys_writev(fd, vec, vlen, flags);
+
+	return __compat_sys_pwritev64(fd, vec, vlen, pos, flags);
 }
+
 #endif
 
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e649426..63a94e2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
 asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high);
+asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
+		const struct compat_iovec __user *vec,
+		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
+asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
+		const struct compat_iovec __user *vec,
+		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 asmlinkage long compat_sys_preadv64(unsigned long fd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bda9b81..cedc22e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -571,8 +571,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
 			     size_t count, loff_t pos);
 asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
 			   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+			    int flags);
 asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
 			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+			    int flags);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
 asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
 asmlinkage long sys_chdir(const char __user *filename);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c1..9406018 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -213,6 +213,10 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
 __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
 #define __NR_pwritev 70
 __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
+#define __NR_preadv2 281
+__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
+#define __NR_pwritev2 282
+__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
 
 /* fs/sendfile.c */
 #define __NR3264_sendfile 71
@@ -709,7 +713,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/mm/filemap.c b/mm/filemap.c
index 14b4642..530c263 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1457,6 +1457,7 @@ static void shrink_readahead_size_eio(struct file *filp,
  * @ppos:	current file position
  * @iter:	data destination
  * @written:	already copied
+ * @flags:	optional flags
  *
  * This is a generic file read routine, and uses the
  * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1465,7 +1466,7 @@ static void shrink_readahead_size_eio(struct file *filp,
  * of the logic when it comes to error handling etc.
  */
 static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
-		struct iov_iter *iter, ssize_t written)
+		struct iov_iter *iter, ssize_t written, int flags)
 {
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
@@ -1735,7 +1736,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		}
 	}
 
-	retval = do_generic_file_read(file, ppos, iter, retval);
+	retval = do_generic_file_read(file, ppos, iter, retval, iocb->ki_rwflags);
 out:
 	return retval;
 }
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v6 4/7] vfs: RWF_NONBLOCK flag for preadv2
       [not found] <cover.1415636409.git.milosz@adfin.com>
  2014-11-10 16:40 ` [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
@ 2014-11-10 16:40 ` Milosz Tanski
  2014-11-10 16:40 ` [PATCH v6 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
  2 siblings, 0 replies; 5+ messages in thread
From: Milosz Tanski @ 2014-11-10 16:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Hellwig, linux-fsdevel, linux-aio, Mel Gorman,
	Volker Lendecke, Tejun Heo, Jeff Moyer, Theodore Ts'o,
	Al Viro, linux-api, Michael Kerrisk, linux-arch, ceph-devel,
	linux-cifs, samba-technical, linux-nfs, linux-xfs, ocfs2-devel,
	linux-mm

generic_file_read_iter() supports a new flag RWF_NONBLOCK which says that we
only want to read the data if it's already in the page cache.

Additionally, there are a few filesystems that we have to specifically
bail early if RWF_NONBLOCK because the op would block. Christoph Hellwig
contributed this code.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/file.c     |  2 ++
 fs/cifs/file.c     |  6 ++++++
 fs/nfs/file.c      |  5 ++++-
 fs/ocfs2/file.c    |  6 ++++++
 fs/pipe.c          |  3 ++-
 fs/read_write.c    | 44 ++++++++++++++++++++++++++++++--------------
 fs/xfs/xfs_file.c  |  4 ++++
 include/linux/fs.h |  3 +++
 mm/filemap.c       | 18 ++++++++++++++++++
 mm/shmem.c         |  4 ++++
 10 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d7e0da8..b798b5c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -822,6 +822,8 @@ again:
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
 	    (fi->flags & CEPH_F_SYNC)) {
+		if (iocb->ki_rwflags & O_NONBLOCK)
+			return -EAGAIN;
 
 		dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3e4d00a..c485afa 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3005,6 +3005,9 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	struct cifs_readdata *rdata, *tmp;
 	struct list_head rdata_list;
 
+	if (iocb->ki_rwflags & RWF_NONBLOCK)
+		return -EAGAIN;
+
 	len = iov_iter_count(to);
 	if (!len)
 		return 0;
@@ -3123,6 +3126,9 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 		return generic_file_read_iter(iocb, to);
 
+	if (iocb->ki_rwflags & RWF_NONBLOCK)
+		return -EAGAIN;
+
 	/*
 	 * We need to hold the sem to be sure nobody modifies lock list
 	 * with a brlock that prevents reading.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2ab6f00..aa9046f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -171,8 +171,11 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t result;
 
-	if (iocb->ki_filp->f_flags & O_DIRECT)
+	if (iocb->ki_filp->f_flags & O_DIRECT) {
+		if (iocb->ki_rwflags & O_NONBLOCK)
+			return -EAGAIN;
 		return nfs_file_direct_read(iocb, to, iocb->ki_pos);
+	}
 
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 324dc93..bb66ca4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2472,6 +2472,12 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
 			filp->f_path.dentry->d_name.name,
 			to->nr_segs);	/* GRRRRR */
 
+	/*
+	 * No non-blocking reads for ocfs2 for now.  Might be doable with
+	 * non-blocking cluster lock helpers.
+	 */
+	if (iocb->ki_rwflags & RWF_NONBLOCK)
+		return -EAGAIN;
 
 	if (!inode) {
 		ret = -EINVAL;
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e5..212bf68 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -302,7 +302,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			 */
 			if (ret)
 				break;
-			if (filp->f_flags & O_NONBLOCK) {
+			if ((filp->f_flags & O_NONBLOCK) ||
+			    (iocb->ki_rwflags & RWF_NONBLOCK)) {
 				ret = -EAGAIN;
 				break;
 			}
diff --git a/fs/read_write.c b/fs/read_write.c
index b1b4bc8..adf85ab 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -835,14 +835,19 @@ static ssize_t do_readv_writev(int type, struct file *file,
 		file_start_write(file);
 	}
 
-	if (iter_fn)
+	if (iter_fn) {
 		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
 						pos, iter_fn, flags);
-	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
-	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+	} else {
+		if (type == READ && (flags & RWF_NONBLOCK))
+			return -EAGAIN;
+
+		if (fnv)
+			ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+							pos, fnv);
+		else
+			ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+	}
 
 	if (type != READ)
 		file_end_write(file);
@@ -866,8 +871,10 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
-	if (flags & ~0)
+	if (flags & ~RWF_NONBLOCK)
 		return -EINVAL;
+	if ((file->f_flags & O_DIRECT) && (flags & RWF_NONBLOCK))
+		return -EAGAIN;
 
 	return do_readv_writev(READ, file, vec, vlen, pos, flags);
 }
@@ -1069,14 +1076,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 		file_start_write(file);
 	}
 
-	if (iter_fn)
+	if (iter_fn) {
 		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
 						pos, iter_fn, flags);
-	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
-	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+	} else {
+		if (type == READ && (flags & RWF_NONBLOCK))
+			return -EAGAIN;
+
+		if (fnv)
+			ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+							pos, fnv);
+		else
+			ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+	}
 
 	if (type != READ)
 		file_end_write(file);
@@ -1105,7 +1117,11 @@ static size_t compat_readv(struct file *file,
 	ret = -EINVAL;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
-	if (flags & ~0)
+	if (flags & ~RWF_NONBLOCK)
+		goto out;
+
+	ret = -EAGAIN;
+	if ((file->f_flags & O_DIRECT) && (flags & RWF_NONBLOCK))
 		goto out;
 
 	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index eb596b4..b1f6334 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -246,6 +246,10 @@ xfs_file_read_iter(
 
 	XFS_STATS_INC(xs_read_calls);
 
+	/* XXX: need a non-blocking iolock helper, shouldn't be too hard */
+	if (iocb->ki_rwflags & RWF_NONBLOCK)
+		return -EAGAIN;
+
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= XFS_IO_ISDIRECT;
 	if (file->f_mode & FMODE_NOCMTIME)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ed5711..eaebd99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1459,6 +1459,9 @@ struct block_device_operations;
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+/* These flags are used for the readv/writev syscalls with flags. */
+#define RWF_NONBLOCK 0x00000001
+
 struct iov_iter;
 
 struct file_operations {
diff --git a/mm/filemap.c b/mm/filemap.c
index 530c263..09d3af3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1494,6 +1494,8 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 find_page:
 		page = find_get_page(mapping, index);
 		if (!page) {
+			if (flags & RWF_NONBLOCK)
+				goto would_block;
 			page_cache_sync_readahead(mapping,
 					ra, filp,
 					index, last_index - index);
@@ -1585,6 +1587,11 @@ page_ok:
 		continue;
 
 page_not_up_to_date:
+		if (flags & RWF_NONBLOCK) {
+			page_cache_release(page);
+			goto would_block;
+		}
+
 		/* Get exclusive access to the page ... */
 		error = lock_page_killable(page);
 		if (unlikely(error))
@@ -1604,6 +1611,12 @@ page_not_up_to_date_locked:
 			goto page_ok;
 		}
 
+		if (flags & RWF_NONBLOCK) {
+			unlock_page(page);
+			page_cache_release(page);
+			goto would_block;
+		}
+
 readpage:
 		/*
 		 * A previous I/O error may have been due to temporary
@@ -1674,6 +1687,8 @@ no_cached_page:
 		goto readpage;
 	}
 
+would_block:
+	error = -EAGAIN;
 out:
 	ra->prev_pos = prev_index;
 	ra->prev_pos <<= PAGE_CACHE_SHIFT;
@@ -1707,6 +1722,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		size_t count = iov_iter_count(iter);
 		loff_t size;
 
+		if (iocb->ki_rwflags & RWF_NONBLOCK)
+			return -EAGAIN;
+
 		if (!count)
 			goto out; /* skip atime */
 		size = i_size_read(inode);
diff --git a/mm/shmem.c b/mm/shmem.c
index cd6fc75..5c30f04 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1531,6 +1531,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t retval = 0;
 	loff_t *ppos = &iocb->ki_pos;
 
+	/* XXX: should be easily supportable */
+	if (iocb->ki_rwflags & RWF_NONBLOCK)
+		return -EAGAIN;
+
 	/*
 	 * Might this read be for a stacking filesystem?  Then when reading
 	 * holes of a sparse file, we actually need to allocate those pages,
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v6 7/7] fs: add a flag for per-operation O_DSYNC semantics
       [not found] <cover.1415636409.git.milosz@adfin.com>
  2014-11-10 16:40 ` [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
  2014-11-10 16:40 ` [PATCH v6 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
@ 2014-11-10 16:40 ` Milosz Tanski
  2 siblings, 0 replies; 5+ messages in thread
From: Milosz Tanski @ 2014-11-10 16:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Hellwig, Christoph Hellwig, linux-fsdevel, linux-aio,
	Mel Gorman, Volker Lendecke, Tejun Heo, Jeff Moyer,
	Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
	linux-arch, ceph-devel, fuse-devel, linux-nfs, ocfs2-devel,
	linux-mm

From: Christoph Hellwig <hch@lst.de>

With the new read/write with flags syscalls we can support a flag
to enable O_DSYNC semantics on a per-operation basis.  This N?s
useful to implement protocols like SMB, NFS or SCSI that have such
per-operation flags.

Example program below:

cat > pwritev2.c << EOF

        (off_t) val,                              \
        (off_t) ((((uint64_t) (val)) >> (sizeof (long) * 4)) >> (sizeof (long) * 4))

static ssize_t
pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
{
        return syscall(__NR_pwritev2, fd, iov, iovcnt, LO_HI_LONG(offset),
			 flags);
}

int main(int argc, char **argv)
{
	int fd = open(argv[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
	char buf[1024];
	struct iovec iov = { .iov_base = buf, .iov_len = 1024 };
	int ret;

        if (fd < 0) {
                perror("open");
                return 0;
        }

	memset(buf, 0xfe, sizeof(buf));

	ret = pwritev2(fd, &iov, 1, 0, RWF_DSYNC);
	if (ret < 0)
		perror("pwritev2");
	else
		printf("ret = %d\n", ret);

	return 0;
}
EOF

Signed-off-by: Christoph Hellwig <hch@lst.de>
[milosz@adfin.com: comapt syscall changes for RWF_ODSYNC]
Signed-off-by: Milosz Tanski <milosz@adfin.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/file.c     |  4 +++-
 fs/fuse/file.c     |  2 ++
 fs/nfs/file.c      | 10 ++++++----
 fs/ocfs2/file.c    |  6 ++++--
 fs/read_write.c    |  8 ++++++--
 include/linux/fs.h |  3 ++-
 mm/filemap.c       |  4 +++-
 7 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b798b5c..2d4e15a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -983,7 +983,9 @@ retry_snap:
 	ceph_put_cap_refs(ci, got);
 
 	if (written >= 0 &&
-	    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+	    ((file->f_flags & O_SYNC) ||
+	     IS_SYNC(file->f_mapping->host) ||
+	     (iocb->ki_rwflags & RWF_DSYNC) ||
 	     ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
 		err = vfs_fsync_range(file, pos, pos + written - 1, 1);
 		if (err < 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index caa8d95..bb4fb23 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1248,6 +1248,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		written += written_buffered;
 		iocb->ki_pos = pos + written_buffered;
 	} else {
+		if (iocb->ki_rwflags & RWF_DSYNC)
+			return -EINVAL;
 		written = fuse_perform_write(file, mapping, from, pos);
 		if (written >= 0)
 			iocb->ki_pos = pos + written;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9046f..c59b0b7 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -652,13 +652,15 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
 	.remap_pages = generic_file_remap_pages,
 };
 
-static int nfs_need_sync_write(struct file *filp, struct inode *inode)
+static int nfs_need_sync_write(struct kiocb *iocb, struct inode *inode)
 {
 	struct nfs_open_context *ctx;
 
-	if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
+	if (IS_SYNC(inode) ||
+	    (iocb->ki_filp->f_flags & O_DSYNC) ||
+	    (iocb->ki_rwflags & RWF_DSYNC))
 		return 1;
-	ctx = nfs_file_open_context(filp);
+	ctx = nfs_file_open_context(iocb->ki_filp);
 	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
 	    nfs_ctx_key_to_expire(ctx))
 		return 1;
@@ -705,7 +707,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 		written = result;
 
 	/* Return error values for O_DSYNC and IS_SYNC() */
-	if (result >= 0 && nfs_need_sync_write(file, inode)) {
+	if (result >= 0 && nfs_need_sync_write(iocb, inode)) {
 		int err = vfs_fsync(file, 0);
 		if (err < 0)
 			result = err;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bb66ca4..8f9a86b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2374,8 +2374,10 @@ out_dio:
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
-	if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
-	    ((file->f_flags & O_DIRECT) && !direct_io)) {
+	if (((file->f_flags & O_DSYNC) && !direct_io) ||
+	    IS_SYNC(inode) ||
+	    ((file->f_flags & O_DIRECT) && !direct_io) ||
+	    (iocb->ki_rwflags & RWF_DSYNC)) {
 		ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
 					       *ppos + count - 1);
 		if (ret < 0)
diff --git a/fs/read_write.c b/fs/read_write.c
index adf85ab..c2e3c0a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -841,6 +841,8 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	} else {
 		if (type == READ && (flags & RWF_NONBLOCK))
 			return -EAGAIN;
+		if (type == WRITE && (flags & RWF_DSYNC))
+			return -EINVAL;
 
 		if (fnv)
 			ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
@@ -888,7 +890,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
-	if (flags & ~0)
+	if (flags & ~RWF_DSYNC)
 		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
@@ -1082,6 +1084,8 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	} else {
 		if (type == READ && (flags & RWF_NONBLOCK))
 			return -EAGAIN;
+		if (type == WRITE && (flags & RWF_DSYNC))
+			return -EINVAL;
 
 		if (fnv)
 			ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
@@ -1221,7 +1225,7 @@ static size_t compat_writev(struct file *file,
 	ret = -EINVAL;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
-	if (flags & ~0)
+	if (flags & ~RWF_DSYNC)
 		goto out;
 
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7d0e116..7786b88 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1460,7 +1460,8 @@ struct block_device_operations;
 #define HAVE_UNLOCKED_IOCTL 1
 
 /* These flags are used for the readv/writev syscalls with flags. */
-#define RWF_NONBLOCK 0x00000001
+#define RWF_NONBLOCK	0x00000001
+#define RWF_DSYNC	0x00000002
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 535967b..8c50d35 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2669,7 +2669,9 @@ int generic_write_sync(struct kiocb *iocb, loff_t count)
 	struct file *file = iocb->ki_filp;
 
 	if (count > 0 &&
-	    ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))) {
+	    ((file->f_flags & O_DSYNC) ||
+	     (iocb->ki_rwflags & RWF_DSYNC) ||
+	     IS_SYNC(file->f_mapping->host))) {
 		bool fdatasync = !(file->f_flags & __O_SYNC);
 		ssize_t ret;
 
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2
  2014-11-10 16:40 ` [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
@ 2014-11-11 21:09   ` Jeff Moyer
  2014-11-12 13:18   ` mohanty bhagaban
  1 sibling, 0 replies; 5+ messages in thread
From: Jeff Moyer @ 2014-11-11 21:09 UTC (permalink / raw)
  To: Milosz Tanski
  Cc: linux-kernel, Christoph Hellwig, linux-fsdevel, linux-aio,
	Mel Gorman, Volker Lendecke, Tejun Heo, Theodore Ts'o,
	Al Viro, linux-api, Michael Kerrisk, linux-arch, linux-mm

Milosz Tanski <milosz@adfin.com> writes:

> New syscalls that take an flag argument. This change does not add any specific
> flags.

Looks good.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>

>
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/read_write.c                   | 172 ++++++++++++++++++++++++++++++--------
>  include/linux/compat.h            |   6 ++
>  include/linux/syscalls.h          |   6 ++
>  include/uapi/asm-generic/unistd.h |   6 +-
>  mm/filemap.c                      |   5 +-
>  5 files changed, 156 insertions(+), 39 deletions(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 94b2d34..b1b4bc8 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
>  		return -EBADF;
>  	if (!(file->f_mode & FMODE_CAN_READ))
>  		return -EINVAL;
> +	if (flags & ~0)
> +		return -EINVAL;
>  
>  	return do_readv_writev(READ, file, vec, vlen, pos, flags);
>  }
> @@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
>  		return -EBADF;
>  	if (!(file->f_mode & FMODE_CAN_WRITE))
>  		return -EINVAL;
> +	if (flags & ~0)
> +		return -EINVAL;
>  
>  	return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
>  }
>  
>  EXPORT_SYMBOL(vfs_writev);
>  
> -SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
> -		unsigned long, vlen)
> +static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
> +			unsigned long vlen, int flags)
>  {
>  	struct fd f = fdget_pos(fd);
>  	ssize_t ret = -EBADF;
>  
>  	if (f.file) {
>  		loff_t pos = file_pos_read(f.file);
> -		ret = vfs_readv(f.file, vec, vlen, &pos, 0);
> +		ret = vfs_readv(f.file, vec, vlen, &pos, flags);
>  		if (ret >= 0)
>  			file_pos_write(f.file, pos);
>  		fdput_pos(f);
> @@ -905,15 +909,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
>  	return ret;
>  }
>  
> -SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
> -		unsigned long, vlen)
> +static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
> +			 unsigned long vlen, int flags)
>  {
>  	struct fd f = fdget_pos(fd);
>  	ssize_t ret = -EBADF;
>  
>  	if (f.file) {
>  		loff_t pos = file_pos_read(f.file);
> -		ret = vfs_writev(f.file, vec, vlen, &pos, 0);
> +		ret = vfs_writev(f.file, vec, vlen, &pos, flags);
>  		if (ret >= 0)
>  			file_pos_write(f.file, pos);
>  		fdput_pos(f);
> @@ -931,10 +935,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
>  	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
>  }
>  
> -SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
> -		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
> +static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
> +			 unsigned long vlen, loff_t pos, int flags)
>  {
> -	loff_t pos = pos_from_hilo(pos_h, pos_l);
>  	struct fd f;
>  	ssize_t ret = -EBADF;
>  
> @@ -945,7 +948,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
>  	if (f.file) {
>  		ret = -ESPIPE;
>  		if (f.file->f_mode & FMODE_PREAD)
> -			ret = vfs_readv(f.file, vec, vlen, &pos, 0);
> +			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
>  		fdput(f);
>  	}
>  
> @@ -955,10 +958,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
>  	return ret;
>  }
>  
> -SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
> -		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
> +static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
> +			  unsigned long vlen, loff_t pos, int flags)
>  {
> -	loff_t pos = pos_from_hilo(pos_h, pos_l);
>  	struct fd f;
>  	ssize_t ret = -EBADF;
>  
> @@ -969,7 +971,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
>  	if (f.file) {
>  		ret = -ESPIPE;
>  		if (f.file->f_mode & FMODE_PWRITE)
> -			ret = vfs_writev(f.file, vec, vlen, &pos, 0);
> +			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
>  		fdput(f);
>  	}
>  
> @@ -979,11 +981,63 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
>  	return ret;
>  }
>  
> +SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen)
> +{
> +	return do_readv(fd, vec, vlen, 0);
> +}
> +
> +SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen)
> +{
> +	return do_writev(fd, vec, vlen, 0);
> +}
> +
> +SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
> +{
> +	loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +	return do_preadv(fd, vec, vlen, pos, 0);
> +}
> +
> +SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
> +		int, flags)
> +{
> +	loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +	if (pos == -1)
> +		return do_readv(fd, vec, vlen, flags);
> +
> +	return do_preadv(fd, vec, vlen, pos, flags);
> +}
> +
> +SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
> +{
> +	loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +	return do_pwritev(fd, vec, vlen, pos, 0);
> +}
> +
> +SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
> +		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
> +		int, flags)
> +{
> +	loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +	if (pos == -1)
> +		return do_writev(fd, vec, vlen, flags);
> +
> +	return do_pwritev(fd, vec, vlen, pos, flags);
> +}
> +
>  #ifdef CONFIG_COMPAT
>  
>  static ssize_t compat_do_readv_writev(int type, struct file *file,
>  			       const struct compat_iovec __user *uvector,
> -			       unsigned long nr_segs, loff_t *pos)
> +			       unsigned long nr_segs, loff_t *pos, int flags)
>  {
>  	compat_ssize_t tot_len;
>  	struct iovec iovstack[UIO_FASTIOV];
> @@ -1017,7 +1071,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
>  
>  	if (iter_fn)
>  		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
> -						pos, iter_fn, 0);
> +						pos, iter_fn, flags);
>  	else if (fnv)
>  		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
>  						pos, fnv);
> @@ -1041,7 +1095,7 @@ out:
>  
>  static size_t compat_readv(struct file *file,
>  			   const struct compat_iovec __user *vec,
> -			   unsigned long vlen, loff_t *pos)
> +			   unsigned long vlen, loff_t *pos, int flags)
>  {
>  	ssize_t ret = -EBADF;
>  
> @@ -1051,8 +1105,10 @@ static size_t compat_readv(struct file *file,
>  	ret = -EINVAL;
>  	if (!(file->f_mode & FMODE_CAN_READ))
>  		goto out;
> +	if (flags & ~0)
> +		goto out;
>  
> -	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
> +	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
>  
>  out:
>  	if (ret > 0)
> @@ -1061,9 +1117,9 @@ out:
>  	return ret;
>  }
>  
> -COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> -		const struct compat_iovec __user *,vec,
> -		compat_ulong_t, vlen)
> +static size_t __compat_sys_readv(compat_ulong_t fd,
> +				 const struct compat_iovec __user *vec,
> +				 compat_ulong_t vlen, int flags)
>  {
>  	struct fd f = fdget_pos(fd);
>  	ssize_t ret;
> @@ -1072,16 +1128,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
>  	if (!f.file)
>  		return -EBADF;
>  	pos = f.file->f_pos;
> -	ret = compat_readv(f.file, vec, vlen, &pos);
> +	ret = compat_readv(f.file, vec, vlen, &pos, flags);
>  	if (ret >= 0)
>  		f.file->f_pos = pos;
>  	fdput_pos(f);
>  	return ret;
> +
> +}
> +
> +COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> +		const struct compat_iovec __user *,vec,
> +		compat_ulong_t, vlen)
> +{
> +	return __compat_sys_readv(fd, vec, vlen, 0);
>  }
>  
>  static long __compat_sys_preadv64(unsigned long fd,
>  				  const struct compat_iovec __user *vec,
> -				  unsigned long vlen, loff_t pos)
> +				  unsigned long vlen, loff_t pos, int flags)
>  {
>  	struct fd f;
>  	ssize_t ret;
> @@ -1093,7 +1157,7 @@ static long __compat_sys_preadv64(unsigned long fd,
>  		return -EBADF;
>  	ret = -ESPIPE;
>  	if (f.file->f_mode & FMODE_PREAD)
> -		ret = compat_readv(f.file, vec, vlen, &pos);
> +		ret = compat_readv(f.file, vec, vlen, &pos, flags);
>  	fdput(f);
>  	return ret;
>  }
> @@ -1103,7 +1167,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
>  		const struct compat_iovec __user *,vec,
>  		unsigned long, vlen, loff_t, pos)
>  {
> -	return __compat_sys_preadv64(fd, vec, vlen, pos);
> +	return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
>  }
>  #endif
>  
> @@ -1113,12 +1177,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
>  {
>  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
>  
> -	return __compat_sys_preadv64(fd, vec, vlen, pos);
> +	return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
> +		const struct compat_iovec __user *,vec,
> +		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
> +		int, flags)
> +{
> +	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +
> +	if (pos == -1)
> +		return __compat_sys_readv(fd, vec, vlen, flags);
> +
> +	return __compat_sys_preadv64(fd, vec, vlen, pos, flags);
>  }
>  
>  static size_t compat_writev(struct file *file,
>  			    const struct compat_iovec __user *vec,
> -			    unsigned long vlen, loff_t *pos)
> +			    unsigned long vlen, loff_t *pos, int flags)
>  {
>  	ssize_t ret = -EBADF;
>  
> @@ -1128,8 +1205,10 @@ static size_t compat_writev(struct file *file,
>  	ret = -EINVAL;
>  	if (!(file->f_mode & FMODE_CAN_WRITE))
>  		goto out;
> +	if (flags & ~0)
> +		goto out;
>  
> -	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
> +	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
>  
>  out:
>  	if (ret > 0)
> @@ -1138,9 +1217,9 @@ out:
>  	return ret;
>  }
>  
> -COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
> -		const struct compat_iovec __user *, vec,
> -		compat_ulong_t, vlen)
> +static size_t __compat_sys_writev(compat_ulong_t fd,
> +				  const struct compat_iovec __user* vec,
> +				  compat_ulong_t vlen, int flags)
>  {
>  	struct fd f = fdget_pos(fd);
>  	ssize_t ret;
> @@ -1149,28 +1228,36 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
>  	if (!f.file)
>  		return -EBADF;
>  	pos = f.file->f_pos;
> -	ret = compat_writev(f.file, vec, vlen, &pos);
> +	ret = compat_writev(f.file, vec, vlen, &pos, flags);
>  	if (ret >= 0)
>  		f.file->f_pos = pos;
>  	fdput_pos(f);
>  	return ret;
>  }
>  
> +COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
> +		const struct compat_iovec __user *, vec,
> +		compat_ulong_t, vlen)
> +{
> +	return __compat_sys_writev(fd, vec, vlen, 0);
> +}
> +
>  static long __compat_sys_pwritev64(unsigned long fd,
>  				   const struct compat_iovec __user *vec,
> -				   unsigned long vlen, loff_t pos)
> +				   unsigned long vlen, loff_t pos, int flags)
>  {
>  	struct fd f;
>  	ssize_t ret;
>  
>  	if (pos < 0)
>  		return -EINVAL;
> +
>  	f = fdget(fd);
>  	if (!f.file)
>  		return -EBADF;
>  	ret = -ESPIPE;
>  	if (f.file->f_mode & FMODE_PWRITE)
> -		ret = compat_writev(f.file, vec, vlen, &pos);
> +		ret = compat_writev(f.file, vec, vlen, &pos, flags);
>  	fdput(f);
>  	return ret;
>  }
> @@ -1180,7 +1267,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
>  		const struct compat_iovec __user *,vec,
>  		unsigned long, vlen, loff_t, pos)
>  {
> -	return __compat_sys_pwritev64(fd, vec, vlen, pos);
> +	return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
>  }
>  #endif
>  
> @@ -1190,8 +1277,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
>  {
>  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
>  
> -	return __compat_sys_pwritev64(fd, vec, vlen, pos);
> +	return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
> +		const struct compat_iovec __user *,vec,
> +		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
> +{
> +	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +
> +	if (pos == -1)
> +		return __compat_sys_writev(fd, vec, vlen, flags);
> +
> +	return __compat_sys_pwritev64(fd, vec, vlen, pos, flags);
>  }
> +
>  #endif
>  
>  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index e649426..63a94e2 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
>  asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd,
>  		const struct compat_iovec __user *vec,
>  		compat_ulong_t vlen, u32 pos_low, u32 pos_high);
> +asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
> +		const struct compat_iovec __user *vec,
> +		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
> +asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
> +		const struct compat_iovec __user *vec,
> +		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
>  
>  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
>  asmlinkage long compat_sys_preadv64(unsigned long fd,
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index bda9b81..cedc22e 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -571,8 +571,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
>  			     size_t count, loff_t pos);
>  asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
>  			   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
> +asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec,
> +			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
> +			    int flags);
>  asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
>  			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
> +asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec,
> +			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
> +			    int flags);
>  asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
>  asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
>  asmlinkage long sys_chdir(const char __user *filename);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 22749c1..9406018 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -213,6 +213,10 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
>  __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
>  #define __NR_pwritev 70
>  __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
> +#define __NR_preadv2 281
> +__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
> +#define __NR_pwritev2 282
> +__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
>  
>  /* fs/sendfile.c */
>  #define __NR3264_sendfile 71
> @@ -709,7 +713,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
>  __SYSCALL(__NR_bpf, sys_bpf)
>  
>  #undef __NR_syscalls
> -#define __NR_syscalls 281
> +#define __NR_syscalls 283
>  
>  /*
>   * All syscalls below here should go away really,
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 14b4642..530c263 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1457,6 +1457,7 @@ static void shrink_readahead_size_eio(struct file *filp,
>   * @ppos:	current file position
>   * @iter:	data destination
>   * @written:	already copied
> + * @flags:	optional flags
>   *
>   * This is a generic file read routine, and uses the
>   * mapping->a_ops->readpage() function for the actual low-level stuff.
> @@ -1465,7 +1466,7 @@ static void shrink_readahead_size_eio(struct file *filp,
>   * of the logic when it comes to error handling etc.
>   */
>  static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
> -		struct iov_iter *iter, ssize_t written)
> +		struct iov_iter *iter, ssize_t written, int flags)
>  {
>  	struct address_space *mapping = filp->f_mapping;
>  	struct inode *inode = mapping->host;
> @@ -1735,7 +1736,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>  		}
>  	}
>  
> -	retval = do_generic_file_read(file, ppos, iter, retval);
> +	retval = do_generic_file_read(file, ppos, iter, retval, iocb->ki_rwflags);
>  out:
>  	return retval;
>  }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2
  2014-11-10 16:40 ` [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
  2014-11-11 21:09   ` Jeff Moyer
@ 2014-11-12 13:18   ` mohanty bhagaban
  1 sibling, 0 replies; 5+ messages in thread
From: mohanty bhagaban @ 2014-11-12 13:18 UTC (permalink / raw)
  To: russel.david100
  Cc: linux-kernel, Christoph Hellwig, linux-fsdevel, linux-aio,
	Mel Gorman, Volker Lendecke, Tejun Heo, Jeff Moyer,
	Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
	linux-arch, linux-mm

[-- Attachment #1: Type: text/plain, Size: 19994 bytes --]

Russel,

Will this new flag ,  affect to any  io_vector. and any buffer cache.

+SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *,
vec,
+               unsigned long, vlen, unsigned long, pos_l, unsigned long,
pos_h,
+               int flags)
+{
+       loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+       if (pos == -1)
+               return do_readv(fd, vec, vlen, flags);
+
+       return do_preadv(fd, vec, vlen, pos, flags);
+}
+

Bhagaban





On Mon, Nov 10, 2014 at 10:10 PM, Milosz Tanski <milosz@adfin.com> wrote:

> New syscalls that take an flag argument. This change does not add any
> specific
> flags.
>
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/read_write.c                   | 172
> ++++++++++++++++++++++++++++++--------
>  include/linux/compat.h            |   6 ++
>  include/linux/syscalls.h          |   6 ++
>  include/uapi/asm-generic/unistd.h |   6 +-
>  mm/filemap.c                      |   5 +-
>  5 files changed, 156 insertions(+), 39 deletions(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 94b2d34..b1b4bc8 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct
> iovec __user *vec,
>                 return -EBADF;
>         if (!(file->f_mode & FMODE_CAN_READ))
>                 return -EINVAL;
> +       if (flags & ~0)
> +               return -EINVAL;
>
>         return do_readv_writev(READ, file, vec, vlen, pos, flags);
>  }
> @@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct
> iovec __user *vec,
>                 return -EBADF;
>         if (!(file->f_mode & FMODE_CAN_WRITE))
>                 return -EINVAL;
> +       if (flags & ~0)
> +               return -EINVAL;
>
>         return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
>  }
>
>  EXPORT_SYMBOL(vfs_writev);
>
> -SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *,
> vec,
> -               unsigned long, vlen)
> +static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
> +                       unsigned long vlen, int flags)
>  {
>         struct fd f = fdget_pos(fd);
>         ssize_t ret = -EBADF;
>
>         if (f.file) {
>                 loff_t pos = file_pos_read(f.file);
> -               ret = vfs_readv(f.file, vec, vlen, &pos, 0);
> +               ret = vfs_readv(f.file, vec, vlen, &pos, flags);
>                 if (ret >= 0)
>                         file_pos_write(f.file, pos);
>                 fdput_pos(f);
> @@ -905,15 +909,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const
> struct iovec __user *, vec,
>         return ret;
>  }
>
> -SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *,
> vec,
> -               unsigned long, vlen)
> +static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
> +                        unsigned long vlen, int flags)
>  {
>         struct fd f = fdget_pos(fd);
>         ssize_t ret = -EBADF;
>
>         if (f.file) {
>                 loff_t pos = file_pos_read(f.file);
> -               ret = vfs_writev(f.file, vec, vlen, &pos, 0);
> +               ret = vfs_writev(f.file, vec, vlen, &pos, flags);
>                 if (ret >= 0)
>                         file_pos_write(f.file, pos);
>                 fdput_pos(f);
> @@ -931,10 +935,9 @@ static inline loff_t pos_from_hilo(unsigned long
> high, unsigned long low)
>         return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
>  }
>
> -SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *,
> vec,
> -               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h)
> +static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
> +                        unsigned long vlen, loff_t pos, int flags)
>  {
> -       loff_t pos = pos_from_hilo(pos_h, pos_l);
>         struct fd f;
>         ssize_t ret = -EBADF;
>
> @@ -945,7 +948,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const
> struct iovec __user *, vec,
>         if (f.file) {
>                 ret = -ESPIPE;
>                 if (f.file->f_mode & FMODE_PREAD)
> -                       ret = vfs_readv(f.file, vec, vlen, &pos, 0);
> +                       ret = vfs_readv(f.file, vec, vlen, &pos, flags);
>                 fdput(f);
>         }
>
> @@ -955,10 +958,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const
> struct iovec __user *, vec,
>         return ret;
>  }
>
> -SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *,
> vec,
> -               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h)
> +static ssize_t do_pwritev(unsigned long fd, const struct iovec __user
> *vec,
> +                         unsigned long vlen, loff_t pos, int flags)
>  {
> -       loff_t pos = pos_from_hilo(pos_h, pos_l);
>         struct fd f;
>         ssize_t ret = -EBADF;
>
> @@ -969,7 +971,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const
> struct iovec __user *, vec,
>         if (f.file) {
>                 ret = -ESPIPE;
>                 if (f.file->f_mode & FMODE_PWRITE)
> -                       ret = vfs_writev(f.file, vec, vlen, &pos, 0);
> +                       ret = vfs_writev(f.file, vec, vlen, &pos, flags);
>                 fdput(f);
>         }
>
> @@ -979,11 +981,63 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const
> struct iovec __user *, vec,
>         return ret;
>  }
>
> +SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen)
> +{
> +       return do_readv(fd, vec, vlen, 0);
> +}
> +
> +SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen)
> +{
> +       return do_writev(fd, vec, vlen, 0);
> +}
> +
> +SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h)
> +{
> +       loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +       return do_preadv(fd, vec, vlen, pos, 0);
> +}
> +
> +SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h,
> +               int, flags)
> +{
> +       loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +       if (pos == -1)
> +               return do_readv(fd, vec, vlen, flags);
> +
> +       return do_preadv(fd, vec, vlen, pos, flags);
> +}
> +
> +SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h)
> +{
> +       loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +       return do_pwritev(fd, vec, vlen, pos, 0);
> +}
> +
> +SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *,
> vec,
> +               unsigned long, vlen, unsigned long, pos_l, unsigned long,
> pos_h,
> +               int, flags)
> +{
> +       loff_t pos = pos_from_hilo(pos_h, pos_l);
> +
> +       if (pos == -1)
> +               return do_writev(fd, vec, vlen, flags);
> +
> +       return do_pwritev(fd, vec, vlen, pos, flags);
> +}
> +
>  #ifdef CONFIG_COMPAT
>
>  static ssize_t compat_do_readv_writev(int type, struct file *file,
>                                const struct compat_iovec __user *uvector,
> -                              unsigned long nr_segs, loff_t *pos)
> +                              unsigned long nr_segs, loff_t *pos, int
> flags)
>  {
>         compat_ssize_t tot_len;
>         struct iovec iovstack[UIO_FASTIOV];
> @@ -1017,7 +1071,7 @@ static ssize_t compat_do_readv_writev(int type,
> struct file *file,
>
>         if (iter_fn)
>                 ret = do_iter_readv_writev(file, type, iov, nr_segs,
> tot_len,
> -                                               pos, iter_fn, 0);
> +                                               pos, iter_fn, flags);
>         else if (fnv)
>                 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
>                                                 pos, fnv);
> @@ -1041,7 +1095,7 @@ out:
>
>  static size_t compat_readv(struct file *file,
>                            const struct compat_iovec __user *vec,
> -                          unsigned long vlen, loff_t *pos)
> +                          unsigned long vlen, loff_t *pos, int flags)
>  {
>         ssize_t ret = -EBADF;
>
> @@ -1051,8 +1105,10 @@ static size_t compat_readv(struct file *file,
>         ret = -EINVAL;
>         if (!(file->f_mode & FMODE_CAN_READ))
>                 goto out;
> +       if (flags & ~0)
> +               goto out;
>
> -       ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
> +       ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
>
>  out:
>         if (ret > 0)
> @@ -1061,9 +1117,9 @@ out:
>         return ret;
>  }
>
> -COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> -               const struct compat_iovec __user *,vec,
> -               compat_ulong_t, vlen)
> +static size_t __compat_sys_readv(compat_ulong_t fd,
> +                                const struct compat_iovec __user *vec,
> +                                compat_ulong_t vlen, int flags)
>  {
>         struct fd f = fdget_pos(fd);
>         ssize_t ret;
> @@ -1072,16 +1128,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
>         if (!f.file)
>                 return -EBADF;
>         pos = f.file->f_pos;
> -       ret = compat_readv(f.file, vec, vlen, &pos);
> +       ret = compat_readv(f.file, vec, vlen, &pos, flags);
>         if (ret >= 0)
>                 f.file->f_pos = pos;
>         fdput_pos(f);
>         return ret;
> +
> +}
> +
> +COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> +               const struct compat_iovec __user *,vec,
> +               compat_ulong_t, vlen)
> +{
> +       return __compat_sys_readv(fd, vec, vlen, 0);
>  }
>
>  static long __compat_sys_preadv64(unsigned long fd,
>                                   const struct compat_iovec __user *vec,
> -                                 unsigned long vlen, loff_t pos)
> +                                 unsigned long vlen, loff_t pos, int
> flags)
>  {
>         struct fd f;
>         ssize_t ret;
> @@ -1093,7 +1157,7 @@ static long __compat_sys_preadv64(unsigned long fd,
>                 return -EBADF;
>         ret = -ESPIPE;
>         if (f.file->f_mode & FMODE_PREAD)
> -               ret = compat_readv(f.file, vec, vlen, &pos);
> +               ret = compat_readv(f.file, vec, vlen, &pos, flags);
>         fdput(f);
>         return ret;
>  }
> @@ -1103,7 +1167,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
>                 const struct compat_iovec __user *,vec,
>                 unsigned long, vlen, loff_t, pos)
>  {
> -       return __compat_sys_preadv64(fd, vec, vlen, pos);
> +       return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
>  }
>  #endif
>
> @@ -1113,12 +1177,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
>  {
>         loff_t pos = ((loff_t)pos_high << 32) | pos_low;
>
> -       return __compat_sys_preadv64(fd, vec, vlen, pos);
> +       return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
> +               const struct compat_iovec __user *,vec,
> +               compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
> +               int, flags)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +
> +       if (pos == -1)
> +               return __compat_sys_readv(fd, vec, vlen, flags);
> +
> +       return __compat_sys_preadv64(fd, vec, vlen, pos, flags);
>  }
>
>  static size_t compat_writev(struct file *file,
>                             const struct compat_iovec __user *vec,
> -                           unsigned long vlen, loff_t *pos)
> +                           unsigned long vlen, loff_t *pos, int flags)
>  {
>         ssize_t ret = -EBADF;
>
> @@ -1128,8 +1205,10 @@ static size_t compat_writev(struct file *file,
>         ret = -EINVAL;
>         if (!(file->f_mode & FMODE_CAN_WRITE))
>                 goto out;
> +       if (flags & ~0)
> +               goto out;
>
> -       ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
> +       ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
>
>  out:
>         if (ret > 0)
> @@ -1138,9 +1217,9 @@ out:
>         return ret;
>  }
>
> -COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
> -               const struct compat_iovec __user *, vec,
> -               compat_ulong_t, vlen)
> +static size_t __compat_sys_writev(compat_ulong_t fd,
> +                                 const struct compat_iovec __user* vec,
> +                                 compat_ulong_t vlen, int flags)
>  {
>         struct fd f = fdget_pos(fd);
>         ssize_t ret;
> @@ -1149,28 +1228,36 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
>         if (!f.file)
>                 return -EBADF;
>         pos = f.file->f_pos;
> -       ret = compat_writev(f.file, vec, vlen, &pos);
> +       ret = compat_writev(f.file, vec, vlen, &pos, flags);
>         if (ret >= 0)
>                 f.file->f_pos = pos;
>         fdput_pos(f);
>         return ret;
>  }
>
> +COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
> +               const struct compat_iovec __user *, vec,
> +               compat_ulong_t, vlen)
> +{
> +       return __compat_sys_writev(fd, vec, vlen, 0);
> +}
> +
>  static long __compat_sys_pwritev64(unsigned long fd,
>                                    const struct compat_iovec __user *vec,
> -                                  unsigned long vlen, loff_t pos)
> +                                  unsigned long vlen, loff_t pos, int
> flags)
>  {
>         struct fd f;
>         ssize_t ret;
>
>         if (pos < 0)
>                 return -EINVAL;
> +
>         f = fdget(fd);
>         if (!f.file)
>                 return -EBADF;
>         ret = -ESPIPE;
>         if (f.file->f_mode & FMODE_PWRITE)
> -               ret = compat_writev(f.file, vec, vlen, &pos);
> +               ret = compat_writev(f.file, vec, vlen, &pos, flags);
>         fdput(f);
>         return ret;
>  }
> @@ -1180,7 +1267,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
>                 const struct compat_iovec __user *,vec,
>                 unsigned long, vlen, loff_t, pos)
>  {
> -       return __compat_sys_pwritev64(fd, vec, vlen, pos);
> +       return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
>  }
>  #endif
>
> @@ -1190,8 +1277,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
>  {
>         loff_t pos = ((loff_t)pos_high << 32) | pos_low;
>
> -       return __compat_sys_pwritev64(fd, vec, vlen, pos);
> +       return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
> +               const struct compat_iovec __user *,vec,
> +               compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int,
> flags)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +
> +       if (pos == -1)
> +               return __compat_sys_writev(fd, vec, vlen, flags);
> +
> +       return __compat_sys_pwritev64(fd, vec, vlen, pos, flags);
>  }
> +
>  #endif
>
>  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index e649426..63a94e2 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t
> fd,
>  asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd,
>                 const struct compat_iovec __user *vec,
>                 compat_ulong_t vlen, u32 pos_low, u32 pos_high);
> +asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
> +               const struct compat_iovec __user *vec,
> +               compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
> +asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
> +               const struct compat_iovec __user *vec,
> +               compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
>
>  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
>  asmlinkage long compat_sys_preadv64(unsigned long fd,
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index bda9b81..cedc22e 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -571,8 +571,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const
> char __user *buf,
>                              size_t count, loff_t pos);
>  asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user
> *vec,
>                            unsigned long vlen, unsigned long pos_l,
> unsigned long pos_h);
> +asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user
> *vec,
> +                           unsigned long vlen, unsigned long pos_l,
> unsigned long pos_h,
> +                           int flags);
>  asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user
> *vec,
>                             unsigned long vlen, unsigned long pos_l,
> unsigned long pos_h);
> +asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user
> *vec,
> +                           unsigned long vlen, unsigned long pos_l,
> unsigned long pos_h,
> +                           int flags);
>  asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
>  asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
>  asmlinkage long sys_chdir(const char __user *filename);
> diff --git a/include/uapi/asm-generic/unistd.h
> b/include/uapi/asm-generic/unistd.h
> index 22749c1..9406018 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -213,6 +213,10 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64,
> compat_sys_pwrite64)
>  __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
>  #define __NR_pwritev 70
>  __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
> +#define __NR_preadv2 281
> +__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
> +#define __NR_pwritev2 282
> +__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
>
>  /* fs/sendfile.c */
>  #define __NR3264_sendfile 71
> @@ -709,7 +713,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
>  __SYSCALL(__NR_bpf, sys_bpf)
>
>  #undef __NR_syscalls
> -#define __NR_syscalls 281
> +#define __NR_syscalls 283
>
>  /*
>   * All syscalls below here should go away really,
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 14b4642..530c263 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1457,6 +1457,7 @@ static void shrink_readahead_size_eio(struct file
> *filp,
>   * @ppos:      current file position
>   * @iter:      data destination
>   * @written:   already copied
> + * @flags:     optional flags
>   *
>   * This is a generic file read routine, and uses the
>   * mapping->a_ops->readpage() function for the actual low-level stuff.
> @@ -1465,7 +1466,7 @@ static void shrink_readahead_size_eio(struct file
> *filp,
>   * of the logic when it comes to error handling etc.
>   */
>  static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
> -               struct iov_iter *iter, ssize_t written)
> +               struct iov_iter *iter, ssize_t written, int flags)
>  {
>         struct address_space *mapping = filp->f_mapping;
>         struct inode *inode = mapping->host;
> @@ -1735,7 +1736,7 @@ generic_file_read_iter(struct kiocb *iocb, struct
> iov_iter *iter)
>                 }
>         }
>
> -       retval = do_generic_file_read(file, ppos, iter, retval);
> +       retval = do_generic_file_read(file, ppos, iter, retval,
> iocb->ki_rwflags);
>  out:
>         return retval;
>  }
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

[-- Attachment #2: Type: text/html, Size: 25572 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-11-12 13:18 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1415636409.git.milosz@adfin.com>
2014-11-10 16:40 ` [PATCH v6 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
2014-11-11 21:09   ` Jeff Moyer
2014-11-12 13:18   ` mohanty bhagaban
2014-11-10 16:40 ` [PATCH v6 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
2014-11-10 16:40 ` [PATCH v6 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox