linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Khalid Aziz <khalid.aziz@oracle.com>
To: akpm@linux-foundation.org, willy@infradead.org
Cc: Khalid Aziz <khalid.aziz@oracle.com>,
	aneesh.kumar@linux.ibm.com, arnd@arndb.de, 21cnbao@gmail.com,
	corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
	ebiederm@xmission.com, hagen@jauu.net, jack@suse.cz,
	keescook@chromium.org, kirill@shutemov.name, kucharsk@gmail.com,
	linkinjeon@kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	longpeng2@huawei.com, luto@kernel.org, markhemm@googlemail.com,
	pcc@google.com, rppt@kernel.org, sieberf@amazon.com,
	sjpark@amazon.de, surenb@google.com, tst@schoebel-theuer.de,
	yzaikin@google.com
Subject: [PATCH v2 1/9] mm: Add msharefs filesystem
Date: Wed, 29 Jun 2022 16:53:52 -0600	[thread overview]
Message-ID: <de5566e71e038d95342d00364c6760c7078cb091.1656531090.git.khalid.aziz@oracle.com> (raw)
In-Reply-To: <cover.1656531090.git.khalid.aziz@oracle.com>

Add a ram-based filesystem that contains page table sharing
information and files that enables processes to share page tables.
This patch adds the basic filesystem that can be mounted.

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
---
 Documentation/filesystems/msharefs.rst |  19 +++++
 include/uapi/linux/magic.h             |   1 +
 mm/Makefile                            |   2 +-
 mm/mshare.c                            | 103 +++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/filesystems/msharefs.rst
 create mode 100644 mm/mshare.c

diff --git a/Documentation/filesystems/msharefs.rst b/Documentation/filesystems/msharefs.rst
new file mode 100644
index 000000000000..fd161f67045d
--- /dev/null
+++ b/Documentation/filesystems/msharefs.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+msharefs - a filesystem to support shared page tables
+=====================================================
+
+msharefs is a ram-based filesystem that allows multiple processes to
+share page table entries for shared pages.
+
+msharefs is typically mounted like this::
+
+	mount -t msharefs none /sys/fs/mshare
+
+When a process calls mshare syscall with a name for the shared address
+range, a file with the same name is created under msharefs with that
+name. This file can be opened by another process, if permissions
+allow, to query the addresses shared under this range. These files are
+removed by mshare_unlink syscall and can not be deleted directly.
+Hence these files are created as immutable files.
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index f724129c0425..2a57a6ec6f3e 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -105,5 +105,6 @@
 #define Z3FOLD_MAGIC		0x33
 #define PPC_CMM_MAGIC		0xc7571590
 #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
+#define MSHARE_MAGIC		0x4d534852	/* "MSHR" */
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/Makefile b/mm/Makefile
index 6f9ffa968a1a..51a2ab9080d9 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -37,7 +37,7 @@ CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
 CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
 
 mmu-y			:= nommu.o
-mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
+mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o mshare.o \
 			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
 			   msync.o page_vma_mapped.o pagewalk.o \
 			   pgtable-generic.o rmap.o vmalloc.o
diff --git a/mm/mshare.c b/mm/mshare.c
new file mode 100644
index 000000000000..c8fab3869bab
--- /dev/null
+++ b/mm/mshare.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Enable copperating processes to share page table between
+ * them to reduce the extra memory consumed by multiple copies
+ * of page tables.
+ *
+ * This code adds an in-memory filesystem - msharefs.
+ * msharefs is used to manage page table sharing
+ *
+ *
+ * Copyright (C) 2022 Oracle Corp. All rights reserved.
+ * Author:	Khalid Aziz <khalid.aziz@oracle.com>
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pseudo_fs.h>
+#include <linux/fileattr.h>
+#include <uapi/linux/magic.h>
+#include <uapi/linux/limits.h>
+
+static struct super_block *msharefs_sb;
+
+static const struct file_operations msharefs_file_operations = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+};
+
+static int
+msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+	unsigned long hash = init_name_hash(dentry);
+	const unsigned char *s = qstr->name;
+	unsigned int len = qstr->len;
+
+	while (len--)
+		hash = partial_name_hash(*s++, hash);
+	qstr->hash = end_name_hash(hash);
+	return 0;
+}
+
+static const struct dentry_operations msharefs_d_ops = {
+	.d_hash = msharefs_d_hash,
+};
+
+static int
+msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	static const struct tree_descr empty_descr = {""};
+	int err;
+
+	sb->s_d_op = &msharefs_d_ops;
+	err = simple_fill_super(sb, MSHARE_MAGIC, &empty_descr);
+	if (err)
+		return err;
+
+	msharefs_sb = sb;
+	return 0;
+}
+
+static int
+msharefs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, msharefs_fill_super);
+}
+
+static const struct fs_context_operations msharefs_context_ops = {
+	.get_tree	= msharefs_get_tree,
+};
+
+static int
+mshare_init_fs_context(struct fs_context *fc)
+{
+	fc->ops = &msharefs_context_ops;
+	return 0;
+}
+
+static struct file_system_type mshare_fs = {
+	.name			= "msharefs",
+	.init_fs_context	= mshare_init_fs_context,
+	.kill_sb		= kill_litter_super,
+};
+
+static int
+mshare_init(void)
+{
+	int ret = 0;
+
+	ret = sysfs_create_mount_point(fs_kobj, "mshare");
+	if (ret)
+		return ret;
+
+	ret = register_filesystem(&mshare_fs);
+	if (ret)
+		sysfs_remove_mount_point(fs_kobj, "mshare");
+
+	return ret;
+}
+
+fs_initcall(mshare_init);
-- 
2.32.0



  reply	other threads:[~2022-06-29 22:55 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-29 22:53 [PATCH v2 0/9] Add support for shared PTEs across processes Khalid Aziz
2022-06-29 22:53 ` Khalid Aziz [this message]
2022-06-30 21:53   ` [PATCH v2 1/9] mm: Add msharefs filesystem Darrick J. Wong
2022-07-01 16:05     ` Khalid Aziz
2022-06-30 22:57   ` Al Viro
2022-07-01 16:08     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 2/9] mm/mshare: pre-populate msharefs with information file Khalid Aziz
2022-06-30 21:37   ` Darrick J. Wong
2022-06-30 22:54     ` Khalid Aziz
2022-06-30 23:01   ` Al Viro
2022-07-01 16:11     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 3/9] mm/mshare: make msharefs writable and support directories Khalid Aziz
2022-06-30 21:34   ` Darrick J. Wong
2022-06-30 22:49     ` Khalid Aziz
2022-06-30 23:09   ` Al Viro
2022-07-02  0:22     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 4/9] mm/mshare: Add a read operation for msharefs files Khalid Aziz
2022-06-30 21:27   ` Darrick J. Wong
2022-06-30 22:27     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 5/9] mm/mshare: Add vm flag for shared PTE Khalid Aziz
2022-06-30 14:59   ` Mark Hemment
2022-06-30 15:46     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 6/9] mm/mshare: Add mmap operation Khalid Aziz
2022-06-30 21:44   ` Darrick J. Wong
2022-06-30 23:30     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 7/9] mm/mshare: Add unlink and munmap support Khalid Aziz
2022-06-30 21:50   ` Darrick J. Wong
2022-07-01 15:58     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 8/9] mm/mshare: Add basic page table sharing support Khalid Aziz
2022-07-07  9:13   ` Xin Hao
2022-07-07 15:33     ` Khalid Aziz
2022-06-29 22:54 ` [PATCH v2 9/9] mm/mshare: Enable mshare region mapping across processes Khalid Aziz
2022-06-30 11:57 ` [PATCH v2 0/9] Add support for shared PTEs " Mark Hemment
2022-06-30 15:39   ` Khalid Aziz
2022-07-02  4:24 ` Andrew Morton
2022-07-06 19:26   ` Khalid Aziz
2022-07-08 11:47   ` David Hildenbrand
2022-07-08 19:36     ` Khalid Aziz
2022-07-13 14:00       ` David Hildenbrand
2022-07-13 17:58         ` Mike Kravetz
2022-07-13 18:03           ` David Hildenbrand
2022-07-14 22:02         ` Khalid Aziz
2022-07-18 12:59           ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=de5566e71e038d95342d00364c6760c7078cb091.1656531090.git.khalid.aziz@oracle.com \
    --to=khalid.aziz@oracle.com \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=arnd@arndb.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=hagen@jauu.net \
    --cc=jack@suse.cz \
    --cc=keescook@chromium.org \
    --cc=kirill@shutemov.name \
    --cc=kucharsk@gmail.com \
    --cc=linkinjeon@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longpeng2@huawei.com \
    --cc=luto@kernel.org \
    --cc=markhemm@googlemail.com \
    --cc=pcc@google.com \
    --cc=rppt@kernel.org \
    --cc=sieberf@amazon.com \
    --cc=sjpark@amazon.de \
    --cc=surenb@google.com \
    --cc=tst@schoebel-theuer.de \
    --cc=willy@infradead.org \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox