linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: James Gowans <jgowans@amazon.com>
To: <linux-kernel@vger.kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>,
	<kexec@lists.infradead.org>, "Joerg Roedel" <joro@8bytes.org>,
	Will Deacon <will@kernel.org>, <iommu@lists.linux.dev>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	"Christian Brauner" <brauner@kernel.org>,
	<linux-fsdevel@vger.kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>, <kvm@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>, <linux-mm@kvack.org>,
	Alexander Graf <graf@amazon.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	"Jan H . Schoenherr" <jschoenh@amazon.de>,
	Usama Arif <usama.arif@bytedance.com>,
	Anthony Yznaga <anthony.yznaga@oracle.com>,
	Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>,
	<madvenka@linux.microsoft.com>, <steven.sistare@oracle.com>,
	<yuleixzhang@tencent.com>
Subject: [RFC 04/18] pkernfs: support file truncation
Date: Mon, 5 Feb 2024 12:01:49 +0000	[thread overview]
Message-ID: <20240205120203.60312-5-jgowans@amazon.com> (raw)
In-Reply-To: <20240205120203.60312-1-jgowans@amazon.com>

In the previous commit a block allocator was added. Now use that block
allocator to allocate blocks for files when ftruncate is run on them.

To do that a inode_operations is added on the file inodes with a getattr
callback handling the ATTR_SIZE attribute. When this is invoked pages
are allocated, the indexes of which are put into a mappings block.
The mappings block is an array with the index being the file offset
block and the value at that index being the pkernfs block backign that
file offset.
---
 fs/pkernfs/Makefile    |  2 +-
 fs/pkernfs/allocator.c | 24 +++++++++++++++++++
 fs/pkernfs/file.c      | 53 ++++++++++++++++++++++++++++++++++++++++++
 fs/pkernfs/inode.c     | 27 ++++++++++++++++++---
 fs/pkernfs/pkernfs.h   |  7 ++++++
 5 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100644 fs/pkernfs/file.c

diff --git a/fs/pkernfs/Makefile b/fs/pkernfs/Makefile
index d8b92a74fbc6..e41f06cc490f 100644
--- a/fs/pkernfs/Makefile
+++ b/fs/pkernfs/Makefile
@@ -3,4 +3,4 @@
 # Makefile for persistent kernel filesystem
 #
 
-obj-$(CONFIG_PKERNFS_FS) += pkernfs.o inode.o allocator.o dir.o
+obj-$(CONFIG_PKERNFS_FS) += pkernfs.o inode.o allocator.o dir.o file.o
diff --git a/fs/pkernfs/allocator.c b/fs/pkernfs/allocator.c
index 1d4aac9c4545..3905ce92b4a9 100644
--- a/fs/pkernfs/allocator.c
+++ b/fs/pkernfs/allocator.c
@@ -25,3 +25,27 @@ void pkernfs_zero_allocations(struct super_block *sb)
 	/* Second page is inode store */
 	set_bit(1, pkernfs_allocations_bitmap(sb));
 }
+
+/*
+ * Allocs one 2 MiB block, and returns the block index.
+ * Index is 2 MiB chunk index.
+ */
+unsigned long pkernfs_alloc_block(struct super_block *sb)
+{
+	unsigned long free_bit;
+
+	/* Allocations is 2nd half of first page */
+	void *allocations_mem = pkernfs_allocations_bitmap(sb);
+	free_bit = bitmap_find_next_zero_area(allocations_mem,
+			PMD_SIZE / 2, /* Size */
+			0, /* Start */
+			1, /* Number of zeroed bits to look for */
+			0); /* Alignment mask - none required. */
+	bitmap_set(allocations_mem, free_bit, 1);
+	return free_bit;
+}
+
+void *pkernfs_addr_for_block(struct super_block *sb, int block_idx)
+{
+	return pkernfs_mem + (block_idx * PMD_SIZE);
+}
diff --git a/fs/pkernfs/file.c b/fs/pkernfs/file.c
new file mode 100644
index 000000000000..27a637423178
--- /dev/null
+++ b/fs/pkernfs/file.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "pkernfs.h"
+
+static int truncate(struct inode *inode, loff_t newsize)
+{
+	unsigned long free_block;
+	struct pkernfs_inode *pkernfs_inode;
+	unsigned long *mappings;
+
+	pkernfs_inode = pkernfs_get_persisted_inode(inode->i_sb, inode->i_ino);
+	mappings = (unsigned long *)pkernfs_addr_for_block(inode->i_sb,
+		pkernfs_inode->mappings_block);
+	i_size_write(inode, newsize);
+	for (int block_idx = 0; block_idx * PMD_SIZE < newsize; ++block_idx) {
+		free_block = pkernfs_alloc_block(inode->i_sb);
+		if (free_block <= 0)
+			/* TODO: roll back allocations. */
+			return -ENOMEM;
+		*(mappings + block_idx) = free_block;
+		++pkernfs_inode->num_mappings;
+	}
+	return 0;
+}
+
+static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = setattr_prepare(idmap, dentry, iattr);
+	if (error)
+		return error;
+
+	if (iattr->ia_valid & ATTR_SIZE) {
+		error = truncate(inode, iattr->ia_size);
+		if (error)
+			return error;
+	}
+	setattr_copy(idmap, inode, iattr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+const struct inode_operations pkernfs_file_inode_operations = {
+	.setattr = inode_setattr,
+	.getattr = simple_getattr,
+};
+
+const struct file_operations pkernfs_file_fops = {
+	.owner = THIS_MODULE,
+	.iterate_shared = NULL,
+};
diff --git a/fs/pkernfs/inode.c b/fs/pkernfs/inode.c
index f6584c8b8804..7fe4e7b220cc 100644
--- a/fs/pkernfs/inode.c
+++ b/fs/pkernfs/inode.c
@@ -15,14 +15,28 @@ struct pkernfs_inode *pkernfs_get_persisted_inode(struct super_block *sb, int in
 
 struct inode *pkernfs_inode_get(struct super_block *sb, unsigned long ino)
 {
+	struct pkernfs_inode *pkernfs_inode;
 	struct inode *inode = iget_locked(sb, ino);
 
 	/* If this inode is cached it is already populated; just return */
 	if (!(inode->i_state & I_NEW))
 		return inode;
-	inode->i_op = &pkernfs_dir_inode_operations;
+	pkernfs_inode = pkernfs_get_persisted_inode(sb, ino);
 	inode->i_sb = sb;
-	inode->i_mode = S_IFREG;
+	if (pkernfs_inode->flags & PKERNFS_INODE_FLAG_DIR) {
+		inode->i_op = &pkernfs_dir_inode_operations;
+		inode->i_mode = S_IFDIR;
+	} else {
+		inode->i_op = &pkernfs_file_inode_operations;
+		inode->i_mode = S_IFREG;
+		inode->i_fop = &pkernfs_file_fops;
+	}
+
+	inode->i_atime = inode->i_mtime = current_time(inode);
+	inode_set_ctime_current(inode);
+	set_nlink(inode, 1);
+
+	/* Switch based on file type */
 	unlock_new_inode(inode);
 	return inode;
 }
@@ -79,6 +93,8 @@ static int pkernfs_create(struct mnt_idmap *id, struct inode *dir,
 	pkernfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino = free_inode;
 	strscpy(pkernfs_inode->filename, dentry->d_name.name, PKERNFS_FILENAME_LEN);
 	pkernfs_inode->flags = PKERNFS_INODE_FLAG_FILE;
+	pkernfs_inode->mappings_block = pkernfs_alloc_block(dir->i_sb);
+	memset(pkernfs_addr_for_block(dir->i_sb, pkernfs_inode->mappings_block), 0, (2 << 20));
 
 	vfs_inode = pkernfs_inode_get(dir->i_sb, free_inode);
 	d_instantiate(dentry, vfs_inode);
@@ -90,6 +106,7 @@ static struct dentry *pkernfs_lookup(struct inode *dir,
 		unsigned int flags)
 {
 	struct pkernfs_inode *pkernfs_inode;
+	struct inode *vfs_inode;
 	unsigned long ino;
 
 	pkernfs_inode = pkernfs_get_persisted_inode(dir->i_sb, dir->i_ino);
@@ -97,7 +114,10 @@ static struct dentry *pkernfs_lookup(struct inode *dir,
 	while (ino) {
 		pkernfs_inode = pkernfs_get_persisted_inode(dir->i_sb, ino);
 		if (!strncmp(pkernfs_inode->filename, dentry->d_name.name, PKERNFS_FILENAME_LEN)) {
-			d_add(dentry, pkernfs_inode_get(dir->i_sb, ino));
+			vfs_inode = pkernfs_inode_get(dir->i_sb, ino);
+			mark_inode_dirty(dir);
+			dir->i_atime = current_time(dir);
+			d_add(dentry, vfs_inode);
 			break;
 		}
 		ino = pkernfs_inode->sibling_ino;
@@ -146,3 +166,4 @@ const struct inode_operations pkernfs_dir_inode_operations = {
 	.lookup		= pkernfs_lookup,
 	.unlink		= pkernfs_unlink,
 };
+
diff --git a/fs/pkernfs/pkernfs.h b/fs/pkernfs/pkernfs.h
index 4655780f31f2..8b4fee8c5b2e 100644
--- a/fs/pkernfs/pkernfs.h
+++ b/fs/pkernfs/pkernfs.h
@@ -34,8 +34,15 @@ struct pkernfs_inode {
 };
 
 void pkernfs_initialise_inode_store(struct super_block *sb);
+
 void pkernfs_zero_allocations(struct super_block *sb);
+unsigned long pkernfs_alloc_block(struct super_block *sb);
 struct inode *pkernfs_inode_get(struct super_block *sb, unsigned long ino);
+void *pkernfs_addr_for_block(struct super_block *sb, int block_idx);
+
 struct pkernfs_inode *pkernfs_get_persisted_inode(struct super_block *sb, int ino);
 
+
 extern const struct file_operations pkernfs_dir_fops;
+extern const struct file_operations pkernfs_file_fops;
+extern const struct inode_operations pkernfs_file_inode_operations;
-- 
2.40.1



  parent reply	other threads:[~2024-02-05 12:03 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-05 12:01 [RFC 00/18] Pkernfs: Support persistence for live update James Gowans
2024-02-05 12:01 ` [RFC 01/18] pkernfs: Introduce filesystem skeleton James Gowans
2024-02-05 12:01 ` [RFC 02/18] pkernfs: Add persistent inodes hooked into directies James Gowans
2024-02-05 12:01 ` [RFC 03/18] pkernfs: Define an allocator for persistent pages James Gowans
2024-02-05 12:01 ` James Gowans [this message]
2024-02-05 12:01 ` [RFC 05/18] pkernfs: add file mmap callback James Gowans
2024-02-05 23:34   ` Dave Chinner
2024-02-05 12:01 ` [RFC 06/18] init: Add liveupdate cmdline param James Gowans
2024-02-05 12:01 ` [RFC 07/18] pkernfs: Add file type for IOMMU root pgtables James Gowans
2024-02-05 12:01 ` [RFC 08/18] iommu: Add allocator for pgtables from persistent region James Gowans
2024-02-05 12:01 ` [RFC 09/18] intel-iommu: Use pkernfs for root/context pgtable pages James Gowans
2024-02-05 12:01 ` [RFC 10/18] iommu/intel: zap context table entries on kexec James Gowans
2024-02-05 12:01 ` [RFC 11/18] dma-iommu: Always enable deferred attaches for liveupdate James Gowans
2024-02-05 17:45   ` Jason Gunthorpe
2024-02-05 12:01 ` [RFC 12/18] pkernfs: Add IOMMU domain pgtables file James Gowans
2024-02-05 12:01 ` [RFC 13/18] vfio: add ioctl to define persistent pgtables on container James Gowans
2024-02-05 17:08   ` Jason Gunthorpe
2024-02-05 12:01 ` [RFC 14/18] intel-iommu: Allocate domain pgtable pages from pkernfs James Gowans
2024-02-05 17:12   ` Jason Gunthorpe
2024-02-05 12:02 ` [RFC 15/18] pkernfs: register device memory for IOMMU domain pgtables James Gowans
2024-02-05 12:02 ` [RFC 16/18] vfio: support not mapping IOMMU pgtables on live-update James Gowans
2024-02-05 12:02 ` [RFC 17/18] pci: Don't clear bus master is persistence enabled James Gowans
2024-02-05 12:02 ` [RFC 18/18] vfio-pci: Assume device working after liveupdate James Gowans
2024-02-05 17:10 ` [RFC 00/18] Pkernfs: Support persistence for live update Alex Williamson
2024-02-07 14:56   ` Gowans, James
2024-02-07 15:28     ` Jason Gunthorpe
2024-02-05 17:42 ` Jason Gunthorpe
2024-02-07 14:45   ` Gowans, James
2024-02-07 15:22     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240205120203.60312-5-jgowans@amazon.com \
    --to=jgowans@amazon.com \
    --cc=akpm@linux-foundation.org \
    --cc=anthony.yznaga@oracle.com \
    --cc=brauner@kernel.org \
    --cc=dwmw@amazon.co.uk \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=jschoenh@amazon.de \
    --cc=kexec@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=madvenka@linux.microsoft.com \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=skinsburskii@linux.microsoft.com \
    --cc=steven.sistare@oracle.com \
    --cc=usama.arif@bytedance.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will@kernel.org \
    --cc=yuleixzhang@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox