From: Chuck Lever <cel@kernel.org>
To: viro@zeniv.linux.org.uk, brauner@kernel.org, hughd@google.com,
akpm@linux-foundation.org
Cc: Chuck Lever <chuck.lever@oracle.com>,
jlayton@redhat.com, linux-mm@kvack.org,
linux-fsdevel@vger.kernel.org
Subject: [PATCH v4 3/3] shmem: stable directory offsets
Date: Mon, 26 Jun 2023 14:21:40 -0400 [thread overview]
Message-ID: <168780370085.2142.4461798321197359310.stgit@manet.1015granger.net> (raw)
In-Reply-To: <168780354647.2142.537463116658872680.stgit@manet.1015granger.net>
From: Chuck Lever <chuck.lever@oracle.com>
The current cursor-based directory offset mechanism doesn't work
when a tmpfs filesystem is exported via NFS. This is because NFS
clients do not open directories. Each server-side READDIR operation
has to open the directory, read it, then close it. The cursor state
for that directory, being associated strictly with the opened
struct file, is thus discarded after each NFS READDIR operation.
Directory offsets are cached not only by NFS clients, but also by
user space libraries on those clients. Essentially there is no way
to invalidate those caches when directory offsets have changed on
an NFS server after the offset-to-dentry mapping changes. Thus the
whole application stack depends on unchanging directory offsets.
The solution we've come up with is to make the directory offset for
each file in a tmpfs filesystem stable for the life of the directory
entry it represents.
shmem_readdir() and shmem_dir_llseek() now use an xarray to map each
directory offset (an loff_t integer) to the memory address of a
struct dentry.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
mm/shmem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 47 insertions(+), 7 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 721f9fd064aa..89012f3583b1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2410,7 +2410,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
/* Some things misbehave if size == 0 on a directory */
inode->i_size = 2 * BOGO_DIRENT_SIZE;
inode->i_op = &shmem_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ inode->i_fop = &stable_dir_operations;
+ stable_offset_init(inode);
break;
case S_IFLNK:
/*
@@ -2950,7 +2951,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (error && error != -EOPNOTSUPP)
goto out_iput;
- error = 0;
+ error = stable_offset_add(dir, dentry);
+ if (error)
+ goto out_iput;
+
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = current_time(dir);
inode_inc_iversion(dir);
@@ -3027,6 +3031,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
goto out;
}
+ ret = stable_offset_add(dir, dentry);
+ if (ret) {
+ if (inode->i_nlink)
+ shmem_free_inode(inode->i_sb);
+ goto out;
+ }
+
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
inode_inc_iversion(dir);
@@ -3045,6 +3056,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
shmem_free_inode(inode->i_sb);
+ stable_offset_remove(dir, dentry);
+
dir->i_size -= BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
inode_inc_iversion(dir);
@@ -3103,24 +3116,41 @@ static int shmem_rename2(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = S_ISDIR(inode->i_mode);
+ int error;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
- if (flags & RENAME_EXCHANGE)
+ if (flags & RENAME_EXCHANGE) {
+ error = stable_offset_add(new_dir, old_dentry);
+ if (error)
+ return error;
+ error = stable_offset_add(old_dir, new_dentry);
+ if (error) {
+ stable_offset_remove(new_dir, old_dentry);
+ return error;
+ }
+ stable_offset_remove(old_dir, old_dentry);
+ stable_offset_remove(new_dir, new_dentry);
+
+ /* Always returns zero */
return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+ }
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
- int error;
-
error = shmem_whiteout(idmap, old_dir, old_dentry);
if (error)
return error;
}
+ stable_offset_remove(old_dir, old_dentry);
+ error = stable_offset_add(new_dir, old_dentry);
+ if (error)
+ return error;
+
if (d_really_is_positive(new_dentry)) {
(void) shmem_unlink(new_dir, new_dentry);
if (they_are_dirs) {
@@ -3164,19 +3194,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
if (error && error != -EOPNOTSUPP)
goto out_iput;
+ error = stable_offset_add(dir, dentry);
+ if (error)
+ goto out_iput;
+
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
if (!inode->i_link) {
error = -ENOMEM;
- goto out_iput;
+ goto out_remove_offset;
}
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
if (error)
- goto out_iput;
+ goto out_remove_offset;
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
memcpy(folio_address(folio), symname, len);
@@ -3185,12 +3219,16 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
folio_unlock(folio);
folio_put(folio);
}
+
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = current_time(dir);
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry);
return 0;
+
+out_remove_offset:
+ stable_offset_remove(dir, dentry);
out_iput:
iput(inode);
return error;
@@ -3920,6 +3958,8 @@ static void shmem_destroy_inode(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+ if (S_ISDIR(inode->i_mode))
+ stable_offset_destroy(inode);
}
static void shmem_init_inode(void *foo)
next prev parent reply other threads:[~2023-06-26 18:21 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-26 18:21 [PATCH v4 0/3] shmemfs " Chuck Lever
2023-06-26 18:21 ` [PATCH v4 1/3] libfs: Add directory operations for stable offsets Chuck Lever
2023-06-27 6:44 ` Christoph Hellwig
2023-06-27 8:52 ` Christian Brauner
2023-06-27 14:04 ` Chuck Lever III
2023-06-27 14:19 ` Jeff Layton
2023-06-26 18:21 ` [PATCH v4 2/3] shmem: Refactor shmem_symlink() Chuck Lever
2023-06-27 6:44 ` Christoph Hellwig
2023-06-26 18:21 ` Chuck Lever [this message]
2023-06-27 14:06 ` [PATCH v4 3/3] shmem: stable directory offsets Bernd Schubert
2023-06-27 14:11 ` Chuck Lever III
2023-06-27 14:48 ` Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=168780370085.2142.4461798321197359310.stgit@manet.1015granger.net \
--to=cel@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=hughd@google.com \
--cc=jlayton@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox