From: Peng Zhang <zhangpeng.00@bytedance.com>
To: Liam.Howlett@oracle.com, corbet@lwn.net,
akpm@linux-foundation.org, willy@infradead.org,
brauner@kernel.org, surenb@google.com,
michael.christie@oracle.com, peterz@infradead.org,
mathieu.desnoyers@efficios.com, npiggin@gmail.com,
avagin@gmail.com
Cc: linux-mm@kvack.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
Peng Zhang <zhangpeng.00@bytedance.com>
Subject: [PATCH 04/11] maple_tree: Introduce interfaces __mt_dup() and mt_dup()
Date: Wed, 26 Jul 2023 16:09:09 +0800 [thread overview]
Message-ID: <20230726080916.17454-5-zhangpeng.00@bytedance.com> (raw)
In-Reply-To: <20230726080916.17454-1-zhangpeng.00@bytedance.com>
Introduce interfaces __mt_dup() and mt_dup(), which are used to
duplicate a maple tree. Compared with traversing the source tree and
reinserting entry by entry in the new tree, it has better performance.
The difference between __mt_dup() and mt_dup() is that mt_dup() holds
an internal lock.
Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
include/linux/maple_tree.h | 3 +
lib/maple_tree.c | 211 +++++++++++++++++++++++++++++++++++++
2 files changed, 214 insertions(+)
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index c962af188681..229fe78e4c89 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -327,6 +327,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index,
void *entry, gfp_t gfp);
void *mtree_erase(struct maple_tree *mt, unsigned long index);
+int mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
+int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
+
void mtree_destroy(struct maple_tree *mt);
void __mt_destroy(struct maple_tree *mt);
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index da3a2fb405c0..efac6761ae37 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -6595,6 +6595,217 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
}
EXPORT_SYMBOL(mtree_erase);
+/*
+ * mt_dup_free() - Free the nodes of a incomplete maple tree.
+ * @mt: The incomplete maple tree
+ * @node: Free nodes from @node
+ *
+ * This function frees all nodes starting from @node in the reverse order of
+ * mt_dup_build(). At this point we don't need to hold the source tree lock.
+ */
+static void mt_dup_free(struct maple_tree *mt, struct maple_node *node)
+{
+ void **slots;
+ unsigned char offset;
+ struct maple_enode *enode;
+ enum maple_type type;
+ unsigned char count = 0, i;
+
+try_ascend:
+ if (ma_is_root(node)) {
+ mt_free_one(node);
+ return;
+ }
+
+ offset = ma_parent_slot(node);
+ type = ma_parent_type(mt, node);
+ node = ma_parent(node);
+ if (!offset)
+ goto free;
+
+ offset--;
+
+descend:
+ slots = (void **)ma_slots(node, type);
+ enode = slots[offset];
+ if (mte_is_leaf(enode))
+ goto free;
+
+ type = mte_node_type(enode);
+ node = mte_to_node(enode);
+ offset = ma_nonleaf_data_end_nocheck(node, type);
+ goto descend;
+
+free:
+ slots = (void **)ma_slots(node, type);
+ count = ma_nonleaf_data_end_nocheck(node, type) + 1;
+ for (i = 0; i < count; i++)
+ ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
+
+ /* Cast to __rcu to avoid sparse checker complaining. */
+ mt_free_bulk(count, (void __rcu **)slots);
+ goto try_ascend;
+}
+
+/*
+ * mt_dup_build() - Build a new maple tree from a source tree
+ * @mt: The source maple tree to copy from
+ * @new: The new maple tree
+ * @gfp: The GFP_FLAGS to use for allocations
+ * @to_free: Free nodes starting from @to_free if the build fails
+ *
+ * This function builds a new tree in DFS preorder. If it fails due to memory
+ * allocation, @to_free will store the last failed node to free the incomplete
+ * tree. Use mt_dup_free() to free nodes.
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated.
+ */
+static inline int mt_dup_build(struct maple_tree *mt, struct maple_tree *new,
+ gfp_t gfp, struct maple_node **to_free)
+{
+ struct maple_enode *enode;
+ struct maple_node *new_node, *new_parent = NULL, *node;
+ enum maple_type type;
+ void __rcu **slots;
+ void **new_slots;
+ unsigned char count, request, i, offset;
+ unsigned long *set_parent;
+ unsigned long new_root;
+
+ mt_init_flags(new, mt->ma_flags);
+ enode = mt_root_locked(mt);
+ if (unlikely(!xa_is_node(enode))) {
+ rcu_assign_pointer(new->ma_root, enode);
+ return 0;
+ }
+
+ new_node = mt_alloc_one(gfp);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_root = (unsigned long)new_node;
+ new_root |= (unsigned long)enode & MAPLE_NODE_MASK;
+
+copy_node:
+ node = mte_to_node(enode);
+ type = mte_node_type(enode);
+ memcpy(new_node, node, sizeof(struct maple_node));
+
+ set_parent = (unsigned long *)&(new_node->parent);
+ *set_parent &= MAPLE_NODE_MASK;
+ *set_parent |= (unsigned long)new_parent;
+ if (ma_is_leaf(type))
+ goto ascend;
+
+ new_slots = (void **)ma_slots(new_node, type);
+ slots = ma_slots(node, type);
+ request = ma_nonleaf_data_end(mt, node, type) + 1;
+ count = mt_alloc_bulk(gfp, request, new_slots);
+ if (!count) {
+ *to_free = new_node;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++)
+ ((unsigned long *)new_slots)[i] |=
+ ((unsigned long)mt_slot_locked(mt, slots, i) &
+ MAPLE_NODE_MASK);
+ offset = 0;
+
+descend:
+ new_parent = new_node;
+ enode = mt_slot_locked(mt, slots, offset);
+ new_node = mte_to_node(new_slots[offset]);
+ goto copy_node;
+
+ascend:
+ if (ma_is_root(node)) {
+ new_node = mte_to_node((void *)new_root);
+ new_node->parent = ma_parent_ptr((unsigned long)new |
+ MA_ROOT_PARENT);
+ rcu_assign_pointer(new->ma_root, (void *)new_root);
+ return 0;
+ }
+
+ offset = ma_parent_slot(node);
+ type = ma_parent_type(mt, node);
+ node = ma_parent(node);
+ new_node = ma_parent(new_node);
+ if (offset < ma_nonleaf_data_end(mt, node, type)) {
+ offset++;
+ new_slots = (void **)ma_slots(new_node, type);
+ slots = ma_slots(node, type);
+ goto descend;
+ }
+
+ goto ascend;
+}
+
+/**
+ * __mt_dup(): Duplicate a maple tree
+ * @mt: The source maple tree
+ * @new: The new maple tree
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * This function duplicates a maple tree using a faster method than traversing
+ * the source tree and inserting entries into the new tree one by one. The user
+ * needs to lock the source tree manually. Before calling this function, @new
+ * must be an empty tree or an uninitialized tree. If @mt uses an external lock,
+ * we may also need to manually set @new's external lock using
+ * mt_set_external_lock().
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated.
+ */
+int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
+{
+ int ret;
+ struct maple_node *to_free = NULL;
+
+ ret = mt_dup_build(mt, new, gfp, &to_free);
+
+ if (unlikely(ret == -ENOMEM)) {
+ if (to_free)
+ mt_dup_free(new, to_free);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(__mt_dup);
+
+/**
+ * mt_dup(): Duplicate a maple tree
+ * @mt: The source maple tree
+ * @new: The new maple tree
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * This function duplicates a maple tree using a faster method than traversing
+ * the source tree and inserting entries into the new tree one by one. The
+ * function will lock the source tree with an internal lock, and the user does
+ * not need to manually handle the lock. Before calling this function, @new must
+ * be an empty tree or an uninitialized tree. If @mt uses an external lock, we
+ * may also need to manually set @new's external lock using
+ * mt_set_external_lock().
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated.
+ */
+int mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
+{
+ int ret;
+ struct maple_node *to_free = NULL;
+
+ mtree_lock(mt);
+ ret = mt_dup_build(mt, new, gfp, &to_free);
+ mtree_unlock(mt);
+
+ if (unlikely(ret == -ENOMEM)) {
+ if (to_free)
+ mt_dup_free(new, to_free);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(mt_dup);
+
/**
* __mt_destroy() - Walk and free all nodes of a locked maple tree.
* @mt: The maple tree
--
2.20.1
next prev parent reply other threads:[~2023-07-26 8:10 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-26 8:09 [PATCH 00/11] Introduce mt_dup() to improve the performance of fork() Peng Zhang
2023-07-26 8:09 ` [PATCH 01/11] maple_tree: Introduce ma_nonleaf_data_end{_nocheck}() Peng Zhang
2023-07-26 14:58 ` Liam R. Howlett
2023-07-31 9:52 ` Peng Zhang
2023-07-31 16:08 ` Liam R. Howlett
2023-07-26 8:09 ` [PATCH 02/11] maple_tree: Validate MAPLE_ENODE and ma_nonleaf_data_end() Peng Zhang
2023-07-26 8:09 ` [PATCH 03/11] maple_tree: Add some helper functions Peng Zhang
2023-07-26 15:02 ` Liam R. Howlett
2023-07-26 15:08 ` Matthew Wilcox
2023-07-31 11:45 ` Peng Zhang
2023-08-11 17:28 ` Liam R. Howlett
2023-07-31 11:40 ` Peng Zhang
2023-07-26 8:09 ` Peng Zhang [this message]
2023-07-26 16:03 ` [PATCH 04/11] maple_tree: Introduce interfaces __mt_dup() and mt_dup() Liam R. Howlett
2023-07-31 12:24 ` Peng Zhang
2023-07-31 16:27 ` Liam R. Howlett
2023-08-16 13:41 ` Peng Zhang
2023-08-16 18:30 ` Liam R. Howlett
2023-08-18 11:53 ` Peng Zhang
2023-08-18 16:13 ` Liam R. Howlett
2023-07-26 8:09 ` [PATCH 05/11] maple_tree: Add test for mt_dup() Peng Zhang
2023-07-26 16:06 ` Liam R. Howlett
2023-07-31 12:32 ` Peng Zhang
2023-07-31 16:41 ` Liam R. Howlett
2023-07-26 8:09 ` [PATCH 06/11] maple_tree: Introduce mas_replace_entry() to directly replace an entry Peng Zhang
2023-07-26 16:08 ` Liam R. Howlett
2023-07-31 12:39 ` Peng Zhang
2023-07-31 16:48 ` Liam R. Howlett
2023-08-16 13:11 ` Peng Zhang
2023-08-16 17:40 ` Liam R. Howlett
2023-08-18 9:39 ` Peng Zhang
2023-08-18 16:15 ` Liam R. Howlett
2023-07-26 8:09 ` [PATCH 07/11] maple_tree: Update the documentation of maple tree Peng Zhang
2023-07-26 8:09 ` [PATCH 08/11] maple_tree: Skip other tests when BENCH is enabled Peng Zhang
2023-07-26 8:09 ` [PATCH 09/11] maple_tree: Update check_forking() and bench_forking() Peng Zhang
2023-07-26 8:09 ` [PATCH 10/11] MAINTAINERS: Add co-maintainer for maple tree Peng Zhang
2023-07-26 16:39 ` Liam R. Howlett
2023-07-31 12:55 ` Peng Zhang
2023-07-31 20:55 ` Liam R. Howlett
2023-07-26 8:09 ` [PATCH 11/11] fork: Use __mt_dup() to duplicate maple tree in dup_mmap() Peng Zhang
2023-07-26 17:06 ` Liam R. Howlett
2023-07-31 12:59 ` Peng Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230726080916.17454-5-zhangpeng.00@bytedance.com \
--to=zhangpeng.00@bytedance.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=avagin@gmail.com \
--cc=brauner@kernel.org \
--cc=corbet@lwn.net \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=michael.christie@oracle.com \
--cc=npiggin@gmail.com \
--cc=peterz@infradead.org \
--cc=surenb@google.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox