linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/5] shmemfs stable directory cookies
@ 2023-05-05 18:37 Chuck Lever
  2023-05-05 18:38 ` [PATCH v2 1/5] shmem: Refactor shmem_symlink() Chuck Lever
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:37 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

The following series is for continued discussion of the need for
and implementation of stable directory cookies for shmemfs/tmpfs.

Based on one of Andrew's review comments, I've split this one patch
into a series to (hopefully) reduce its complexity and make it
easier to analyze the changes.

Although the patch(es) have been passing functional tests for
several weeks, there have been some reports of performance
regressions that we still need to get to the bottom of.

We might consider a simpler lseek/readdir implementation, as using
an xarray is effective but a bit of overkill. I'd like to avoid a
linked list implementation as that is known to have significant
performance impact past a dozen or so list entries.

Changes since v1:
- Break the patch up into a series

Changes since RFC:
- Destroy xarray in shmem_destroy_inode() instead of free_in_core_inode()
- A few cosmetic updates

---

Chuck Lever (5):
      shmem: Refactor shmem_symlink()
      shmem: Add dir_operations specific to tmpfs
      shmem: Add a per-directory xarray
      shmem: Add a shmem-specific dir_emit helper
      shmem: stable directory cookies


 include/linux/shmem_fs.h |   2 +
 mm/shmem.c               | 213 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 201 insertions(+), 14 deletions(-)

--
Chuck Lever


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 1/5] shmem: Refactor shmem_symlink()
  2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
@ 2023-05-05 18:38 ` Chuck Lever
  2023-05-05 18:38 ` [PATCH v2 2/5] shmem: Add dir_operations specific to tmpfs Chuck Lever
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:38 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

From: Chuck Lever <chuck.lever@oracle.com>

De-duplicate the error handling paths. No change in behavior is
expected.

Suggested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 mm/shmem.c |   19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e40a08c5c6d7..721f9fd064aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3161,26 +3161,22 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
 	error = security_inode_init_security(inode, dir, &dentry->d_name,
 					     shmem_initxattrs, NULL);
-	if (error && error != -EOPNOTSUPP) {
-		iput(inode);
-		return error;
-	}
+	if (error && error != -EOPNOTSUPP)
+		goto out_iput;
 
 	inode->i_size = len-1;
 	if (len <= SHORT_SYMLINK_LEN) {
 		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
 		if (!inode->i_link) {
-			iput(inode);
-			return -ENOMEM;
+			error = -ENOMEM;
+			goto out_iput;
 		}
 		inode->i_op = &shmem_short_symlink_operations;
 	} else {
 		inode_nohighmem(inode);
 		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
-		if (error) {
-			iput(inode);
-			return error;
-		}
+		if (error)
+			goto out_iput;
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
@@ -3195,6 +3191,9 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
+out_iput:
+	iput(inode);
+	return error;
 }
 
 static void shmem_put_link(void *arg)



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 2/5] shmem: Add dir_operations specific to tmpfs
  2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
  2023-05-05 18:38 ` [PATCH v2 1/5] shmem: Refactor shmem_symlink() Chuck Lever
@ 2023-05-05 18:38 ` Chuck Lever
  2023-05-05 18:39 ` [PATCH v2 3/5] shmem: Add a per-directory xarray Chuck Lever
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:38 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

From: Chuck Lever <chuck.lever@oracle.com>

Copy the simple directory operations out of fs/libfs.c to create a
set of dir_operations specific to tmpfs. These will be modified by
a subsequent patch to replace the cursor-based directory entry
offset mechanism.

No behavior change is expected.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 mm/shmem.c |  143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 721f9fd064aa..e48a0947bcaf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -235,6 +235,7 @@ static const struct super_operations shmem_ops;
 const struct address_space_operations shmem_aops;
 static const struct file_operations shmem_file_operations;
 static const struct inode_operations shmem_inode_operations;
+static const struct file_operations shmem_dir_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
@@ -2410,7 +2411,7 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 			/* Some things misbehave if size == 0 on a directory */
 			inode->i_size = 2 * BOGO_DIRENT_SIZE;
 			inode->i_op = &shmem_dir_inode_operations;
-			inode->i_fop = &simple_dir_operations;
+			inode->i_fop = &shmem_dir_operations;
 			break;
 		case S_IFLNK:
 			/*
@@ -3235,6 +3236,137 @@ static const char *shmem_get_link(struct dentry *dentry,
 	return folio_address(folio);
 }
 
+static struct dentry *scan_positives(struct dentry *cursor,
+					struct list_head *p,
+					loff_t count,
+					struct dentry *last)
+{
+	struct dentry *dentry = cursor->d_parent, *found = NULL;
+
+	spin_lock(&dentry->d_lock);
+	while ((p = p->next) != &dentry->d_subdirs) {
+		struct dentry *d = list_entry(p, struct dentry, d_child);
+		// we must at least skip cursors, to avoid livelocks
+		if (d->d_flags & DCACHE_DENTRY_CURSOR)
+			continue;
+		if (simple_positive(d) && !--count) {
+			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+			if (simple_positive(d))
+				found = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			if (likely(found))
+				break;
+			count = 1;
+		}
+		if (need_resched()) {
+			list_move(&cursor->d_child, p);
+			p = &cursor->d_child;
+			spin_unlock(&dentry->d_lock);
+			cond_resched();
+			spin_lock(&dentry->d_lock);
+		}
+	}
+	spin_unlock(&dentry->d_lock);
+	dput(last);
+	return found;
+}
+
+static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	switch (whence) {
+		case 1:
+			offset += file->f_pos;
+			fallthrough;
+		case 0:
+			if (offset >= 0)
+				break;
+			fallthrough;
+		default:
+			return -EINVAL;
+	}
+	if (offset != file->f_pos) {
+		struct dentry *cursor = file->private_data;
+		struct dentry *to = NULL;
+
+		inode_lock_shared(dentry->d_inode);
+
+		if (offset > 2)
+			to = scan_positives(cursor, &dentry->d_subdirs,
+					    offset - 2, NULL);
+		spin_lock(&dentry->d_lock);
+		if (to)
+			list_move(&cursor->d_child, &to->d_child);
+		else
+			list_del_init(&cursor->d_child);
+		spin_unlock(&dentry->d_lock);
+		dput(to);
+
+		file->f_pos = offset;
+
+		inode_unlock_shared(dentry->d_inode);
+	}
+	return offset;
+}
+
+/**
+ * shmem_readdir - Emit entries starting at offset @ctx->pos
+ * @file: an open directory to iterate over
+ * @ctx: directory iteration context
+ *
+ * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ * entries during this call.
+ *
+ * On entry, @ctx->pos contains an offset that represents the first entry
+ * to be read from the directory.
+ *
+ * The operation continues until there are no more entries to read, or
+ * until the ctx->actor indicates there is no more space in the caller's
+ * output buffer.
+ *
+ * On return, @ctx->pos contains an offset that will read the next entry
+ * in this directory when shmem_readdir() is called again with @ctx.
+ *
+ * Return values:
+ *   %0 - Complete
+ */
+static int shmem_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct dentry *cursor = file->private_data;
+	struct list_head *anchor = &dentry->d_subdirs;
+	struct dentry *next = NULL;
+	struct list_head *p;
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	if (ctx->pos == 2)
+		p = anchor;
+	else if (!list_empty(&cursor->d_child))
+		p = &cursor->d_child;
+	else
+		return 0;
+
+	while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
+		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
+			      d_inode(next)->i_ino,
+			      fs_umode_to_dtype(d_inode(next)->i_mode)))
+			break;
+		ctx->pos++;
+		p = &next->d_child;
+	}
+	spin_lock(&dentry->d_lock);
+	if (next)
+		list_move_tail(&cursor->d_child, &next->d_child);
+	else
+		list_del_init(&cursor->d_child);
+	spin_unlock(&dentry->d_lock);
+	dput(next);
+
+	return 0;
+}
+
 #ifdef CONFIG_TMPFS_XATTR
 
 static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
@@ -3987,6 +4119,15 @@ static const struct inode_operations shmem_inode_operations = {
 #endif
 };
 
+static const struct file_operations shmem_dir_operations = {
+#ifdef CONFIG_TMPFS
+	.llseek		= shmem_dir_llseek,
+	.iterate_shared	= shmem_readdir,
+#endif
+	.read		= generic_read_dir,
+	.fsync		= noop_fsync,
+};
+
 static const struct inode_operations shmem_dir_inode_operations = {
 #ifdef CONFIG_TMPFS
 	.getattr	= shmem_getattr,



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 3/5] shmem: Add a per-directory xarray
  2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
  2023-05-05 18:38 ` [PATCH v2 1/5] shmem: Refactor shmem_symlink() Chuck Lever
  2023-05-05 18:38 ` [PATCH v2 2/5] shmem: Add dir_operations specific to tmpfs Chuck Lever
@ 2023-05-05 18:39 ` Chuck Lever
  2023-05-05 18:39 ` [PATCH v2 4/5] shmem: Add a shmem-specific dir_emit helper Chuck Lever
  2023-05-05 18:39 ` [PATCH v2 5/5] shmem: stable directory cookies Chuck Lever
  4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:39 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

From: Chuck Lever <chuck.lever@oracle.com>

Add the infrastructure for managing a per-directory
directory-offset-to-dentry map. For the moment it is unused.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/shmem_fs.h |    2 ++
 mm/shmem.c               |   28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..c1a12eac778d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -27,6 +27,8 @@ struct shmem_inode_info {
 	atomic_t		stop_eviction;	/* hold when working on inode */
 	struct timespec64	i_crtime;	/* file creation time */
 	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
+	struct xarray		doff_map;	/* dir offset to entry mapping */
+	u32			next_doff;
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index e48a0947bcaf..b78253996108 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -40,6 +40,8 @@
 #include <linux/fs_parser.h>
 #include <linux/swapfile.h>
 #include <linux/iversion.h>
+#include <linux/xarray.h>
+
 #include "swap.h"
 
 static struct vfsmount *shm_mnt;
@@ -2412,6 +2414,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 			inode->i_size = 2 * BOGO_DIRENT_SIZE;
 			inode->i_op = &shmem_dir_inode_operations;
 			inode->i_fop = &shmem_dir_operations;
+			xa_init_flags(&info->doff_map, XA_FLAGS_ALLOC1);
+			info->next_doff = 0;
 			break;
 		case S_IFLNK:
 			/*
@@ -2930,6 +2934,22 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
+static struct xarray *shmem_doff_map(struct inode *dir)
+{
+	return &SHMEM_I(dir)->doff_map;
+}
+
+/*
+ * During fs teardown (eg. umount), a directory's doff_map might still
+ * contain entries. xa_destroy() cleans out anything that remains.
+ */
+static void shmem_doff_map_destroy(struct inode *inode)
+{
+	struct xarray *xa = shmem_doff_map(inode);
+
+	xa_destroy(xa);
+}
+
 /*
  * File creation. Allocate an inode, and we're done..
  */
@@ -3905,6 +3925,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	return 0;
 }
 
+#else /* CONFIG_TMPFS */
+
+static inline void shmem_doff_map_destroy(struct inode *dir)
+{
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
@@ -4052,6 +4078,8 @@ static void shmem_destroy_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode))
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+	if (S_ISDIR(inode->i_mode))
+		shmem_doff_map_destroy(inode);
 }
 
 static void shmem_init_inode(void *foo)



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 4/5] shmem: Add a shmem-specific dir_emit helper
  2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
                   ` (2 preceding siblings ...)
  2023-05-05 18:39 ` [PATCH v2 3/5] shmem: Add a per-directory xarray Chuck Lever
@ 2023-05-05 18:39 ` Chuck Lever
  2023-05-05 18:39 ` [PATCH v2 5/5] shmem: stable directory cookies Chuck Lever
  4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:39 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

From: Chuck Lever <chuck.lever@oracle.com>

Clean up to improve the readability of shmem_readdir().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 mm/shmem.c |   13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index b78253996108..733b98ca8517 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3329,6 +3329,15 @@ static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+
+	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len,
+			  ctx->pos, inode->i_ino,
+			  fs_umode_to_dtype(inode->i_mode));
+}
+
 /**
  * shmem_readdir - Emit entries starting at offset @ctx->pos
  * @file: an open directory to iterate over
@@ -3369,9 +3378,7 @@ static int shmem_readdir(struct file *file, struct dir_context *ctx)
 		return 0;
 
 	while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
-		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
-			      d_inode(next)->i_ino,
-			      fs_umode_to_dtype(d_inode(next)->i_mode)))
+		if (!shmem_dir_emit(ctx, dentry))
 			break;
 		ctx->pos++;
 		p = &next->d_child;



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 5/5] shmem: stable directory cookies
  2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
                   ` (3 preceding siblings ...)
  2023-05-05 18:39 ` [PATCH v2 4/5] shmem: Add a shmem-specific dir_emit helper Chuck Lever
@ 2023-05-05 18:39 ` Chuck Lever
  4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2023-05-05 18:39 UTC (permalink / raw)
  To: hughd, akpm; +Cc: linux-mm, linux-fsdevel

From: Chuck Lever <chuck.lever@oracle.com>

The current cursor-based directory cookie mechanism doesn't work
when a tmpfs filesystem is exported via NFS. This is because NFS
clients do not open directories. Each server-side READDIR operation
has to open the directory, read it, then close it. The cursor state
for that directory, being associated strictly with the opened
struct file, is discarded after each READDIR operation.

Directory cookies are cached not only by NFS clients, but also by
user space libraries on those clients. Essentially there is no way
to invalidate those caches when directory offsets have changed on
an NFS server after the offset-to-dentry mapping changes. Thus the
whole application stack depends on unchanging directory cookies.

The solution we've come up with is to make the directory cookie for
each file in a tmpfs filesystem stable for the life of the directory
entry it represents.

Add a per-directory xarray. shmem_readdir() uses this to map each
directory offset (an loff_t integer) to the memory address of a
struct dentry.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 mm/shmem.c |  202 +++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 106 insertions(+), 96 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 733b98ca8517..35eb2f1368dd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2939,6 +2939,55 @@ static struct xarray *shmem_doff_map(struct inode *dir)
 	return &SHMEM_I(dir)->doff_map;
 }
 
+static int shmem_doff_add(struct inode *dir, struct dentry *dentry)
+{
+	struct shmem_inode_info *info = SHMEM_I(dir);
+	struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+	u32 offset;
+	int ret;
+
+	if (dentry->d_fsdata)
+		return -EBUSY;
+
+	offset = 0;
+	ret = xa_alloc_cyclic(shmem_doff_map(dir), &offset, dentry, limit,
+			      &info->next_doff, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	dentry->d_fsdata = (void *)(unsigned long)offset;
+	return 0;
+}
+
+static struct dentry *shmem_doff_find_after(struct dentry *dir,
+					    unsigned long *offset)
+{
+	struct xarray *xa = shmem_doff_map(d_inode(dir));
+	struct dentry *d, *found = NULL;
+
+	spin_lock(&dir->d_lock);
+	d = xa_find_after(xa, offset, ULONG_MAX, XA_PRESENT);
+	if (d) {
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+		if (simple_positive(d))
+			found = dget_dlock(d);
+		spin_unlock(&d->d_lock);
+	}
+	spin_unlock(&dir->d_lock);
+	return found;
+}
+
+static void shmem_doff_remove(struct inode *dir, struct dentry *dentry)
+{
+	u32 offset = (u32)(unsigned long)dentry->d_fsdata;
+
+	if (!offset)
+		return;
+
+	xa_erase(shmem_doff_map(dir), offset);
+	dentry->d_fsdata = NULL;
+}
+
 /*
  * During fs teardown (eg. umount), a directory's doff_map might still
  * contain entries. xa_destroy() cleans out anything that remains.
@@ -2971,6 +3020,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		if (error && error != -EOPNOTSUPP)
 			goto out_iput;
 
+		error = shmem_doff_add(dir, dentry);
+		if (error)
+			goto out_iput;
+
 		error = 0;
 		dir->i_size += BOGO_DIRENT_SIZE;
 		dir->i_ctime = dir->i_mtime = current_time(dir);
@@ -3048,6 +3101,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 			goto out;
 	}
 
+	ret = shmem_doff_add(dir, dentry);
+	if (ret)
+		goto out;
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3066,6 +3123,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
 		shmem_free_inode(inode->i_sb);
 
+	shmem_doff_remove(dir, dentry);
+
 	dir->i_size -= BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3124,24 +3183,37 @@ static int shmem_rename2(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_inode(old_dentry);
 	int they_are_dirs = S_ISDIR(inode->i_mode);
+	int error;
 
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
-	if (flags & RENAME_EXCHANGE)
+	if (flags & RENAME_EXCHANGE) {
+		shmem_doff_remove(old_dir, old_dentry);
+		shmem_doff_remove(new_dir, new_dentry);
+		error = shmem_doff_add(new_dir, old_dentry);
+		if (error)
+			return error;
+		error = shmem_doff_add(old_dir, new_dentry);
+		if (error)
+			return error;
 		return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+	}
 
 	if (!simple_empty(new_dentry))
 		return -ENOTEMPTY;
 
 	if (flags & RENAME_WHITEOUT) {
-		int error;
-
 		error = shmem_whiteout(idmap, old_dir, old_dentry);
 		if (error)
 			return error;
 	}
 
+	shmem_doff_remove(old_dir, old_dentry);
+	error = shmem_doff_add(new_dir, old_dentry);
+	if (error)
+		return error;
+
 	if (d_really_is_positive(new_dentry)) {
 		(void) shmem_unlink(new_dir, new_dentry);
 		if (they_are_dirs) {
@@ -3206,6 +3278,11 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		folio_unlock(folio);
 		folio_put(folio);
 	}
+
+	error = shmem_doff_add(dir, dentry);
+	if (error)
+		goto out_iput;
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	inode_inc_iversion(dir);
@@ -3256,77 +3333,20 @@ static const char *shmem_get_link(struct dentry *dentry,
 	return folio_address(folio);
 }
 
-static struct dentry *scan_positives(struct dentry *cursor,
-					struct list_head *p,
-					loff_t count,
-					struct dentry *last)
-{
-	struct dentry *dentry = cursor->d_parent, *found = NULL;
-
-	spin_lock(&dentry->d_lock);
-	while ((p = p->next) != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(p, struct dentry, d_child);
-		// we must at least skip cursors, to avoid livelocks
-		if (d->d_flags & DCACHE_DENTRY_CURSOR)
-			continue;
-		if (simple_positive(d) && !--count) {
-			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
-			if (simple_positive(d))
-				found = dget_dlock(d);
-			spin_unlock(&d->d_lock);
-			if (likely(found))
-				break;
-			count = 1;
-		}
-		if (need_resched()) {
-			list_move(&cursor->d_child, p);
-			p = &cursor->d_child;
-			spin_unlock(&dentry->d_lock);
-			cond_resched();
-			spin_lock(&dentry->d_lock);
-		}
-	}
-	spin_unlock(&dentry->d_lock);
-	dput(last);
-	return found;
-}
-
 static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct dentry *dentry = file->f_path.dentry;
 	switch (whence) {
-		case 1:
-			offset += file->f_pos;
-			fallthrough;
-		case 0:
-			if (offset >= 0)
-				break;
-			fallthrough;
-		default:
-			return -EINVAL;
-	}
-	if (offset != file->f_pos) {
-		struct dentry *cursor = file->private_data;
-		struct dentry *to = NULL;
-
-		inode_lock_shared(dentry->d_inode);
-
-		if (offset > 2)
-			to = scan_positives(cursor, &dentry->d_subdirs,
-					    offset - 2, NULL);
-		spin_lock(&dentry->d_lock);
-		if (to)
-			list_move(&cursor->d_child, &to->d_child);
-		else
-			list_del_init(&cursor->d_child);
-		spin_unlock(&dentry->d_lock);
-		dput(to);
-
-		file->f_pos = offset;
-
-		inode_unlock_shared(dentry->d_inode);
+	case SEEK_CUR:
+		offset += file->f_pos;
+		fallthrough;
+	case SEEK_SET:
+		if (offset >= 0)
+			break;
+		fallthrough;
+	default:
+		return -EINVAL;
 	}
-	return offset;
+	return vfs_setpos(file, offset, U32_MAX);
 }
 
 static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
@@ -3334,7 +3354,7 @@ static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 	struct inode *inode = d_inode(dentry);
 
 	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len,
-			  ctx->pos, inode->i_ino,
+			  (loff_t)dentry->d_fsdata, inode->i_ino,
 			  fs_umode_to_dtype(inode->i_mode));
 }
 
@@ -3361,36 +3381,26 @@ static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
  */
 static int shmem_readdir(struct file *file, struct dir_context *ctx)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct dentry *cursor = file->private_data;
-	struct list_head *anchor = &dentry->d_subdirs;
-	struct dentry *next = NULL;
-	struct list_head *p;
-
-	if (!dir_emit_dots(file, ctx))
-		return 0;
+	struct dentry *dentry, *dir = file->f_path.dentry;
+	unsigned long offset;
 
-	if (ctx->pos == 2)
-		p = anchor;
-	else if (!list_empty(&cursor->d_child))
-		p = &cursor->d_child;
-	else
-		return 0;
+	lockdep_assert_held(&d_inode(dir)->i_rwsem);
 
-	while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
-		if (!shmem_dir_emit(ctx, dentry))
+	if (!dir_emit_dots(file, ctx))
+		goto out;
+	for (offset = ctx->pos - 1; offset < ULONG_MAX - 1;) {
+		dentry = shmem_doff_find_after(dir, &offset);
+		if (!dentry)
 			break;
-		ctx->pos++;
-		p = &next->d_child;
+		if (!shmem_dir_emit(ctx, dentry)) {
+			dput(dentry);
+			break;
+		}
+		ctx->pos = offset + 1;
+		dput(dentry);
 	}
-	spin_lock(&dentry->d_lock);
-	if (next)
-		list_move_tail(&cursor->d_child, &next->d_child);
-	else
-		list_del_init(&cursor->d_child);
-	spin_unlock(&dentry->d_lock);
-	dput(next);
 
+out:
 	return 0;
 }
 



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-05-05 18:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-05 18:37 [PATCH v2 0/5] shmemfs stable directory cookies Chuck Lever
2023-05-05 18:38 ` [PATCH v2 1/5] shmem: Refactor shmem_symlink() Chuck Lever
2023-05-05 18:38 ` [PATCH v2 2/5] shmem: Add dir_operations specific to tmpfs Chuck Lever
2023-05-05 18:39 ` [PATCH v2 3/5] shmem: Add a per-directory xarray Chuck Lever
2023-05-05 18:39 ` [PATCH v2 4/5] shmem: Add a shmem-specific dir_emit helper Chuck Lever
2023-05-05 18:39 ` [PATCH v2 5/5] shmem: stable directory cookies Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox