From: James Gowans <jgowans@amazon.com>
To: <linux-kernel@vger.kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>,
<kexec@lists.infradead.org>, "Joerg Roedel" <joro@8bytes.org>,
Will Deacon <will@kernel.org>, <iommu@lists.linux.dev>,
Alexander Viro <viro@zeniv.linux.org.uk>,
"Christian Brauner" <brauner@kernel.org>,
<linux-fsdevel@vger.kernel.org>,
Paolo Bonzini <pbonzini@redhat.com>,
Sean Christopherson <seanjc@google.com>, <kvm@vger.kernel.org>,
Andrew Morton <akpm@linux-foundation.org>, <linux-mm@kvack.org>,
Alexander Graf <graf@amazon.com>,
David Woodhouse <dwmw@amazon.co.uk>,
"Jan H . Schoenherr" <jschoenh@amazon.de>,
Usama Arif <usama.arif@bytedance.com>,
Anthony Yznaga <anthony.yznaga@oracle.com>,
Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>,
<madvenka@linux.microsoft.com>, <steven.sistare@oracle.com>,
<yuleixzhang@tencent.com>
Subject: [RFC 04/18] pkernfs: support file truncation
Date: Mon, 5 Feb 2024 12:01:49 +0000 [thread overview]
Message-ID: <20240205120203.60312-5-jgowans@amazon.com> (raw)
In-Reply-To: <20240205120203.60312-1-jgowans@amazon.com>
In the previous commit a block allocator was added. Now use that block
allocator to allocate blocks for files when ftruncate is run on them.
To do that a inode_operations is added on the file inodes with a getattr
callback handling the ATTR_SIZE attribute. When this is invoked pages
are allocated, the indexes of which are put into a mappings block.
The mappings block is an array with the index being the file offset
block and the value at that index being the pkernfs block backign that
file offset.
---
fs/pkernfs/Makefile | 2 +-
fs/pkernfs/allocator.c | 24 +++++++++++++++++++
fs/pkernfs/file.c | 53 ++++++++++++++++++++++++++++++++++++++++++
fs/pkernfs/inode.c | 27 ++++++++++++++++++---
fs/pkernfs/pkernfs.h | 7 ++++++
5 files changed, 109 insertions(+), 4 deletions(-)
create mode 100644 fs/pkernfs/file.c
diff --git a/fs/pkernfs/Makefile b/fs/pkernfs/Makefile
index d8b92a74fbc6..e41f06cc490f 100644
--- a/fs/pkernfs/Makefile
+++ b/fs/pkernfs/Makefile
@@ -3,4 +3,4 @@
# Makefile for persistent kernel filesystem
#
-obj-$(CONFIG_PKERNFS_FS) += pkernfs.o inode.o allocator.o dir.o
+obj-$(CONFIG_PKERNFS_FS) += pkernfs.o inode.o allocator.o dir.o file.o
diff --git a/fs/pkernfs/allocator.c b/fs/pkernfs/allocator.c
index 1d4aac9c4545..3905ce92b4a9 100644
--- a/fs/pkernfs/allocator.c
+++ b/fs/pkernfs/allocator.c
@@ -25,3 +25,27 @@ void pkernfs_zero_allocations(struct super_block *sb)
/* Second page is inode store */
set_bit(1, pkernfs_allocations_bitmap(sb));
}
+
+/*
+ * Allocs one 2 MiB block, and returns the block index.
+ * Index is 2 MiB chunk index.
+ */
+unsigned long pkernfs_alloc_block(struct super_block *sb)
+{
+ unsigned long free_bit;
+
+ /* Allocations is 2nd half of first page */
+ void *allocations_mem = pkernfs_allocations_bitmap(sb);
+ free_bit = bitmap_find_next_zero_area(allocations_mem,
+ PMD_SIZE / 2, /* Size */
+ 0, /* Start */
+ 1, /* Number of zeroed bits to look for */
+ 0); /* Alignment mask - none required. */
+ bitmap_set(allocations_mem, free_bit, 1);
+ return free_bit;
+}
+
+void *pkernfs_addr_for_block(struct super_block *sb, int block_idx)
+{
+ return pkernfs_mem + (block_idx * PMD_SIZE);
+}
diff --git a/fs/pkernfs/file.c b/fs/pkernfs/file.c
new file mode 100644
index 000000000000..27a637423178
--- /dev/null
+++ b/fs/pkernfs/file.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "pkernfs.h"
+
+static int truncate(struct inode *inode, loff_t newsize)
+{
+ unsigned long free_block;
+ struct pkernfs_inode *pkernfs_inode;
+ unsigned long *mappings;
+
+ pkernfs_inode = pkernfs_get_persisted_inode(inode->i_sb, inode->i_ino);
+ mappings = (unsigned long *)pkernfs_addr_for_block(inode->i_sb,
+ pkernfs_inode->mappings_block);
+ i_size_write(inode, newsize);
+ for (int block_idx = 0; block_idx * PMD_SIZE < newsize; ++block_idx) {
+ free_block = pkernfs_alloc_block(inode->i_sb);
+ if (free_block <= 0)
+ /* TODO: roll back allocations. */
+ return -ENOMEM;
+ *(mappings + block_idx) = free_block;
+ ++pkernfs_inode->num_mappings;
+ }
+ return 0;
+}
+
+static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = setattr_prepare(idmap, dentry, iattr);
+ if (error)
+ return error;
+
+ if (iattr->ia_valid & ATTR_SIZE) {
+ error = truncate(inode, iattr->ia_size);
+ if (error)
+ return error;
+ }
+ setattr_copy(idmap, inode, iattr);
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+const struct inode_operations pkernfs_file_inode_operations = {
+ .setattr = inode_setattr,
+ .getattr = simple_getattr,
+};
+
+const struct file_operations pkernfs_file_fops = {
+ .owner = THIS_MODULE,
+ .iterate_shared = NULL,
+};
diff --git a/fs/pkernfs/inode.c b/fs/pkernfs/inode.c
index f6584c8b8804..7fe4e7b220cc 100644
--- a/fs/pkernfs/inode.c
+++ b/fs/pkernfs/inode.c
@@ -15,14 +15,28 @@ struct pkernfs_inode *pkernfs_get_persisted_inode(struct super_block *sb, int in
struct inode *pkernfs_inode_get(struct super_block *sb, unsigned long ino)
{
+ struct pkernfs_inode *pkernfs_inode;
struct inode *inode = iget_locked(sb, ino);
/* If this inode is cached it is already populated; just return */
if (!(inode->i_state & I_NEW))
return inode;
- inode->i_op = &pkernfs_dir_inode_operations;
+ pkernfs_inode = pkernfs_get_persisted_inode(sb, ino);
inode->i_sb = sb;
- inode->i_mode = S_IFREG;
+ if (pkernfs_inode->flags & PKERNFS_INODE_FLAG_DIR) {
+ inode->i_op = &pkernfs_dir_inode_operations;
+ inode->i_mode = S_IFDIR;
+ } else {
+ inode->i_op = &pkernfs_file_inode_operations;
+ inode->i_mode = S_IFREG;
+ inode->i_fop = &pkernfs_file_fops;
+ }
+
+ inode->i_atime = inode->i_mtime = current_time(inode);
+ inode_set_ctime_current(inode);
+ set_nlink(inode, 1);
+
+ /* Switch based on file type */
unlock_new_inode(inode);
return inode;
}
@@ -79,6 +93,8 @@ static int pkernfs_create(struct mnt_idmap *id, struct inode *dir,
pkernfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino = free_inode;
strscpy(pkernfs_inode->filename, dentry->d_name.name, PKERNFS_FILENAME_LEN);
pkernfs_inode->flags = PKERNFS_INODE_FLAG_FILE;
+ pkernfs_inode->mappings_block = pkernfs_alloc_block(dir->i_sb);
+ memset(pkernfs_addr_for_block(dir->i_sb, pkernfs_inode->mappings_block), 0, (2 << 20));
vfs_inode = pkernfs_inode_get(dir->i_sb, free_inode);
d_instantiate(dentry, vfs_inode);
@@ -90,6 +106,7 @@ static struct dentry *pkernfs_lookup(struct inode *dir,
unsigned int flags)
{
struct pkernfs_inode *pkernfs_inode;
+ struct inode *vfs_inode;
unsigned long ino;
pkernfs_inode = pkernfs_get_persisted_inode(dir->i_sb, dir->i_ino);
@@ -97,7 +114,10 @@ static struct dentry *pkernfs_lookup(struct inode *dir,
while (ino) {
pkernfs_inode = pkernfs_get_persisted_inode(dir->i_sb, ino);
if (!strncmp(pkernfs_inode->filename, dentry->d_name.name, PKERNFS_FILENAME_LEN)) {
- d_add(dentry, pkernfs_inode_get(dir->i_sb, ino));
+ vfs_inode = pkernfs_inode_get(dir->i_sb, ino);
+ mark_inode_dirty(dir);
+ dir->i_atime = current_time(dir);
+ d_add(dentry, vfs_inode);
break;
}
ino = pkernfs_inode->sibling_ino;
@@ -146,3 +166,4 @@ const struct inode_operations pkernfs_dir_inode_operations = {
.lookup = pkernfs_lookup,
.unlink = pkernfs_unlink,
};
+
diff --git a/fs/pkernfs/pkernfs.h b/fs/pkernfs/pkernfs.h
index 4655780f31f2..8b4fee8c5b2e 100644
--- a/fs/pkernfs/pkernfs.h
+++ b/fs/pkernfs/pkernfs.h
@@ -34,8 +34,15 @@ struct pkernfs_inode {
};
void pkernfs_initialise_inode_store(struct super_block *sb);
+
void pkernfs_zero_allocations(struct super_block *sb);
+unsigned long pkernfs_alloc_block(struct super_block *sb);
struct inode *pkernfs_inode_get(struct super_block *sb, unsigned long ino);
+void *pkernfs_addr_for_block(struct super_block *sb, int block_idx);
+
struct pkernfs_inode *pkernfs_get_persisted_inode(struct super_block *sb, int ino);
+
extern const struct file_operations pkernfs_dir_fops;
+extern const struct file_operations pkernfs_file_fops;
+extern const struct inode_operations pkernfs_file_inode_operations;
--
2.40.1
next prev parent reply other threads:[~2024-02-05 12:03 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-05 12:01 [RFC 00/18] Pkernfs: Support persistence for live update James Gowans
2024-02-05 12:01 ` [RFC 01/18] pkernfs: Introduce filesystem skeleton James Gowans
2024-02-05 12:01 ` [RFC 02/18] pkernfs: Add persistent inodes hooked into directies James Gowans
2024-02-05 12:01 ` [RFC 03/18] pkernfs: Define an allocator for persistent pages James Gowans
2024-02-05 12:01 ` James Gowans [this message]
2024-02-05 12:01 ` [RFC 05/18] pkernfs: add file mmap callback James Gowans
2024-02-05 23:34 ` Dave Chinner
2024-02-05 12:01 ` [RFC 06/18] init: Add liveupdate cmdline param James Gowans
2024-02-05 12:01 ` [RFC 07/18] pkernfs: Add file type for IOMMU root pgtables James Gowans
2024-02-05 12:01 ` [RFC 08/18] iommu: Add allocator for pgtables from persistent region James Gowans
2024-02-05 12:01 ` [RFC 09/18] intel-iommu: Use pkernfs for root/context pgtable pages James Gowans
2024-02-05 12:01 ` [RFC 10/18] iommu/intel: zap context table entries on kexec James Gowans
2024-02-05 12:01 ` [RFC 11/18] dma-iommu: Always enable deferred attaches for liveupdate James Gowans
2024-02-05 17:45 ` Jason Gunthorpe
2024-02-05 12:01 ` [RFC 12/18] pkernfs: Add IOMMU domain pgtables file James Gowans
2024-02-05 12:01 ` [RFC 13/18] vfio: add ioctl to define persistent pgtables on container James Gowans
2024-02-05 17:08 ` Jason Gunthorpe
2024-02-05 12:01 ` [RFC 14/18] intel-iommu: Allocate domain pgtable pages from pkernfs James Gowans
2024-02-05 17:12 ` Jason Gunthorpe
2024-02-05 12:02 ` [RFC 15/18] pkernfs: register device memory for IOMMU domain pgtables James Gowans
2024-02-05 12:02 ` [RFC 16/18] vfio: support not mapping IOMMU pgtables on live-update James Gowans
2024-02-05 12:02 ` [RFC 17/18] pci: Don't clear bus master is persistence enabled James Gowans
2024-02-05 12:02 ` [RFC 18/18] vfio-pci: Assume device working after liveupdate James Gowans
2024-02-05 17:10 ` [RFC 00/18] Pkernfs: Support persistence for live update Alex Williamson
2024-02-07 14:56 ` Gowans, James
2024-02-07 15:28 ` Jason Gunthorpe
2024-02-05 17:42 ` Jason Gunthorpe
2024-02-07 14:45 ` Gowans, James
2024-02-07 15:22 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240205120203.60312-5-jgowans@amazon.com \
--to=jgowans@amazon.com \
--cc=akpm@linux-foundation.org \
--cc=anthony.yznaga@oracle.com \
--cc=brauner@kernel.org \
--cc=dwmw@amazon.co.uk \
--cc=ebiederm@xmission.com \
--cc=graf@amazon.com \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=jschoenh@amazon.de \
--cc=kexec@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=madvenka@linux.microsoft.com \
--cc=pbonzini@redhat.com \
--cc=seanjc@google.com \
--cc=skinsburskii@linux.microsoft.com \
--cc=steven.sistare@oracle.com \
--cc=usama.arif@bytedance.com \
--cc=viro@zeniv.linux.org.uk \
--cc=will@kernel.org \
--cc=yuleixzhang@tencent.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox