From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: "Liam R . Howlett" <Liam.Howlett@oracle.com>,
Vlastimil Babka <vbabka@suse.cz>, Jann Horn <jannh@google.com>,
Pedro Falcato <pfalcato@suse.de>,
David Hildenbrand <david@redhat.com>, Kees Cook <kees@kernel.org>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
Suren Baghdasaryan <surenb@google.com>,
linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 2/4] mm: perform VMA allocation, freeing, duplication in mm
Date: Thu, 24 Apr 2025 22:15:27 +0100 [thread overview]
Message-ID: <0f848d59f3eea3dd0c0cdc3920644222c40cffe6.1745528282.git.lorenzo.stoakes@oracle.com> (raw)
In-Reply-To: <cover.1745528282.git.lorenzo.stoakes@oracle.com>
Right now these are performed in kernel/fork.c which is odd and a violation
of separation of concerns, as well as preventing us from integrating this
and related logic into userland VMA testing going forward, and perhaps more
importantly - enabling us to, in a subsequent commit, make VMA
allocation/freeing a purely internal mm operation.
There is a fly in the ointment - nommu - mmap.c is not compiled if
CONFIG_MMU is not set, and there is no sensible place to put these outside
of that, so we are put in the position of having to duplication some logic
here.
This isn't ideal, but since nommu is a niche use-case, already duplicates a
great deal of mmu logic by its nature and we can eliminate code that is not
applicable to nommu, it seems a worthwhile trade-off.
The intent is to move all this logic to vma.c in a subsequent commit,
rendering VMA allocation, freeing and duplication mm-internal-only and
userland testable.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
kernel/fork.c | 88 ---------------------------------------------------
mm/mmap.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/nommu.c | 67 +++++++++++++++++++++++++++++++++++++++
3 files changed, 151 insertions(+), 88 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 168681fc4b25..ebddc51624ec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -428,88 +428,9 @@ struct kmem_cache *files_cachep;
/* SLAB cache for fs_struct structures (tsk->fs) */
struct kmem_cache *fs_cachep;
-/* SLAB cache for vm_area_struct structures */
-static struct kmem_cache *vm_area_cachep;
-
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
-
- vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
- if (!vma)
- return NULL;
-
- vma_init(vma, mm);
-
- return vma;
-}
-
-static void vm_area_init_from(const struct vm_area_struct *src,
- struct vm_area_struct *dest)
-{
- dest->vm_mm = src->vm_mm;
- dest->vm_ops = src->vm_ops;
- dest->vm_start = src->vm_start;
- dest->vm_end = src->vm_end;
- dest->anon_vma = src->anon_vma;
- dest->vm_pgoff = src->vm_pgoff;
- dest->vm_file = src->vm_file;
- dest->vm_private_data = src->vm_private_data;
- vm_flags_init(dest, src->vm_flags);
- memcpy(&dest->vm_page_prot, &src->vm_page_prot,
- sizeof(dest->vm_page_prot));
- /*
- * src->shared.rb may be modified concurrently when called from
- * dup_mmap(), but the clone will reinitialize it.
- */
- data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
- memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
- sizeof(dest->vm_userfaultfd_ctx));
-#ifdef CONFIG_ANON_VMA_NAME
- dest->anon_name = src->anon_name;
-#endif
-#ifdef CONFIG_SWAP
- memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
- sizeof(dest->swap_readahead_info));
-#endif
-#ifndef CONFIG_MMU
- dest->vm_region = src->vm_region;
-#endif
-#ifdef CONFIG_NUMA
- dest->vm_policy = src->vm_policy;
-#endif
-}
-
-struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
- struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-
- if (!new)
- return NULL;
-
- ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
- ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
- vm_area_init_from(orig, new);
- vma_lock_init(new, true);
- INIT_LIST_HEAD(&new->anon_vma_chain);
- vma_numab_state_init(new);
- dup_anon_vma_name(orig, new);
-
- return new;
-}
-
-void vm_area_free(struct vm_area_struct *vma)
-{
- /* The vma should be detached while being destroyed. */
- vma_assert_detached(vma);
- vma_numab_state_free(vma);
- free_anon_vma_name(vma);
- kmem_cache_free(vm_area_cachep, vma);
-}
-
static void account_kernel_stack(struct task_struct *tsk, int account)
{
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@ -3214,11 +3135,6 @@ void __init mm_cache_init(void)
void __init proc_caches_init(void)
{
- struct kmem_cache_args args = {
- .use_freeptr_offset = true,
- .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
- };
-
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3235,10 +3151,6 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
- vm_area_cachep = kmem_cache_create("vm_area_struct",
- sizeof(struct vm_area_struct), &args,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
- SLAB_ACCOUNT);
mmap_init();
nsproxy_cache_init();
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 1289c6381419..fbddcd082a93 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -77,6 +77,82 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
+/* SLAB cache for vm_area_struct structures */
+static struct kmem_cache *vm_area_cachep;
+
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ if (!vma)
+ return NULL;
+
+ vma_init(vma, mm);
+
+ return vma;
+}
+
+static void vm_area_init_from(const struct vm_area_struct *src,
+ struct vm_area_struct *dest)
+{
+ dest->vm_mm = src->vm_mm;
+ dest->vm_ops = src->vm_ops;
+ dest->vm_start = src->vm_start;
+ dest->vm_end = src->vm_end;
+ dest->anon_vma = src->anon_vma;
+ dest->vm_pgoff = src->vm_pgoff;
+ dest->vm_file = src->vm_file;
+ dest->vm_private_data = src->vm_private_data;
+ vm_flags_init(dest, src->vm_flags);
+ memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+ sizeof(dest->vm_page_prot));
+ /*
+ * src->shared.rb may be modified concurrently when called from
+ * dup_mmap(), but the clone will reinitialize it.
+ */
+ data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+ memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
+ sizeof(dest->vm_userfaultfd_ctx));
+#ifdef CONFIG_ANON_VMA_NAME
+ dest->anon_name = src->anon_name;
+#endif
+#ifdef CONFIG_SWAP
+ memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
+ sizeof(dest->swap_readahead_info));
+#endif
+#ifdef CONFIG_NUMA
+ dest->vm_policy = src->vm_policy;
+#endif
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+ struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ vm_area_init_from(orig, new);
+ vma_lock_init(new, true);
+ INIT_LIST_HEAD(&new->anon_vma_chain);
+ vma_numab_state_init(new);
+ dup_anon_vma_name(orig, new);
+
+ return new;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+ /* The vma should be detached while being destroyed. */
+ vma_assert_detached(vma);
+ vma_numab_state_free(vma);
+ free_anon_vma_name(vma);
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
/* Update vma->vm_page_prot to reflect vma->vm_flags. */
void vma_set_page_prot(struct vm_area_struct *vma)
{
@@ -1601,9 +1677,17 @@ static const struct ctl_table mmap_table[] = {
void __init mmap_init(void)
{
int ret;
+ struct kmem_cache_args args = {
+ .use_freeptr_offset = true,
+ .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+ };
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), &args,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+ SLAB_ACCOUNT);
#ifdef CONFIG_SYSCTL
register_sysctl_init("vm", mmap_table);
#endif
diff --git a/mm/nommu.c b/mm/nommu.c
index 2b4d304c6445..2b3722266222 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -53,6 +53,65 @@ static struct kmem_cache *vm_region_jar;
struct rb_root nommu_region_tree = RB_ROOT;
DECLARE_RWSEM(nommu_region_sem);
+/* SLAB cache for vm_area_struct structures */
+static struct kmem_cache *vm_area_cachep;
+
+vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ if (!vma)
+ return NULL;
+
+ vma_init(vma, mm);
+
+ return vma;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
+static void vm_area_init_from(const struct vm_area_struct *src,
+ struct vm_area_struct *dest)
+{
+ dest->vm_mm = src->vm_mm;
+ dest->vm_ops = src->vm_ops;
+ dest->vm_start = src->vm_start;
+ dest->vm_end = src->vm_end;
+ dest->anon_vma = src->anon_vma;
+ dest->vm_pgoff = src->vm_pgoff;
+ dest->vm_file = src->vm_file;
+ dest->vm_private_data = src->vm_private_data;
+ vm_flags_init(dest, src->vm_flags);
+ memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+ sizeof(dest->vm_page_prot));
+ /*
+ * src->shared.rb may be modified concurrently when called from
+ * dup_mmap(), but the clone will reinitialize it.
+ */
+ data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+
+ dest->vm_region = src->vm_region;
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+ struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ vm_area_init_from(orig, new);
+ INIT_LIST_HEAD(&new->anon_vma_chain);
+
+ return new;
+}
+
const struct vm_operations_struct generic_file_vm_ops = {
};
@@ -404,10 +463,18 @@ static const struct ctl_table nommu_table[] = {
void __init mmap_init(void)
{
int ret;
+ struct kmem_cache_args args = {
+ .use_freeptr_offset = true,
+ .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+ };
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), &args,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+ SLAB_ACCOUNT);
register_sysctl_init("vm", nommu_table);
}
--
2.49.0
next prev parent reply other threads:[~2025-04-24 21:15 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-24 21:15 [PATCH 0/4] move all VMA allocation, freeing and duplication logic to mm Lorenzo Stoakes
2025-04-24 21:15 ` [PATCH 1/4] mm: abstract initial stack setup to mm subsystem Lorenzo Stoakes
2025-04-24 21:30 ` David Hildenbrand
2025-04-25 0:55 ` Suren Baghdasaryan
2025-04-25 10:10 ` Lorenzo Stoakes
2025-04-25 10:11 ` Lorenzo Stoakes
2025-04-24 21:15 ` Lorenzo Stoakes [this message]
2025-04-24 21:22 ` [PATCH 2/4] mm: perform VMA allocation, freeing, duplication in mm David Hildenbrand
2025-04-25 1:22 ` Suren Baghdasaryan
2025-04-25 1:37 ` Suren Baghdasaryan
2025-04-25 10:10 ` Lorenzo Stoakes
2025-04-25 11:04 ` Lorenzo Stoakes
2025-04-25 10:09 ` Lorenzo Stoakes
2025-04-25 10:26 ` Liam R. Howlett
2025-04-25 10:31 ` Lorenzo Stoakes
2025-04-25 10:45 ` Lorenzo Stoakes
2025-04-25 11:00 ` Liam R. Howlett
2025-04-25 11:03 ` Lorenzo Stoakes
2025-04-25 10:17 ` Lorenzo Stoakes
2025-04-25 3:15 ` Kees Cook
2025-04-25 10:40 ` Lorenzo Stoakes
2025-04-25 10:53 ` Pedro Falcato
2025-04-25 13:54 ` Liam R. Howlett
2025-04-25 15:32 ` Suren Baghdasaryan
2025-04-25 15:34 ` Suren Baghdasaryan
2025-04-25 17:12 ` Kees Cook
2025-04-25 17:26 ` Suren Baghdasaryan
2025-04-24 21:15 ` [PATCH 3/4] mm: move dup_mmap() to mm Lorenzo Stoakes
2025-04-25 9:13 ` Pedro Falcato
2025-04-25 10:18 ` Lorenzo Stoakes
2025-04-24 21:15 ` [PATCH 4/4] mm: move vm_area_alloc,dup,free() functions to vma.c Lorenzo Stoakes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0f848d59f3eea3dd0c0cdc3920644222c40cffe6.1745528282.git.lorenzo.stoakes@oracle.com \
--to=lorenzo.stoakes@oracle.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=david@redhat.com \
--cc=jack@suse.cz \
--cc=jannh@google.com \
--cc=kees@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=pfalcato@suse.de \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox