* [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries
@ 2024-08-12 19:05 Sidhartha Kumar
2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar
2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett
0 siblings, 2 replies; 4+ messages in thread
From: Sidhartha Kumar @ 2024-08-12 19:05 UTC (permalink / raw)
To: linux-kernel, maple-tree
Cc: linux-mm, akpm, liam.howlett, willy, Sidhartha Kumar
The following scenario can result in a race condition:
Consider a node with the following indices and values
a<------->b<----------->c<--------->d
0xA NULL 0xB
CPU 1 CPU 2
--------- ---------
mas_set_range(a,b)
mas_erase()
-> range is expanded (a,c) because of null expansion
mas_nomem()
mas_unlock()
mas_store_range(b,c,0xC)
The node now looks like:
a<------->b<----------->c<--------->d
0xA 0xC 0xB
mas_lock()
mas_erase() <------ range of erase is still (a,c)
The node is now NULL from (a,c) but the write from CPU 2 should have been
retained and range (b,c) should still have 0xC as its value. We can fix
this by re-intializing to the original index and last. This does not need
a cc: Stable as there are no users of the maple tree which use internal
locking and this condition is only possible with internal locking.
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
v1 -> v2:
- re-initialize index and last in the mas_nomem() if statement so
fast path is not effected in mas_erase().
- use __mas_set_range() rather than set mas->index and mas->last
directly.
lib/maple_tree.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index aa3a5df15b8e..b547ff211ac7 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store);
*/
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
{
+ unsigned long index = mas->index;
+ unsigned long last = mas->last;
MA_WR_STATE(wr_mas, mas, entry);
mas_wr_store_setup(&wr_mas);
trace_ma_write(__func__, mas, 0, entry);
retry:
mas_wr_store_entry(&wr_mas);
- if (unlikely(mas_nomem(mas, gfp)))
+ if (unlikely(mas_nomem(mas, gfp))) {
+ if (!entry)
+ __mas_set_range(mas, index, last);
goto retry;
+ }
if (unlikely(mas_is_err(mas)))
return xa_err(mas->node);
@@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
void *mas_erase(struct ma_state *mas)
{
void *entry;
+ unsigned long index = mas->index;
MA_WR_STATE(wr_mas, mas, NULL);
if (!mas_is_active(mas) || !mas_is_start(mas))
mas->status = ma_start;
- /* Retry unnecessary when holding the write lock. */
+write_retry:
entry = mas_state_walk(mas);
if (!entry)
return NULL;
-write_retry:
/* Must reset to ensure spanning writes of last slot are detected */
mas_reset(mas);
mas_wr_store_setup(&wr_mas);
mas_wr_store_entry(&wr_mas);
- if (mas_nomem(mas, GFP_KERNEL))
+ if (mas_nomem(mas, GFP_KERNEL)) {
+ /* in case the range of entry changed when unlocked */
+ mas->index = mas->last = index;
goto write_retry;
+ }
return entry;
}
--
2.46.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions
2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar
@ 2024-08-12 19:05 ` Sidhartha Kumar
2024-08-13 14:12 ` Liam R. Howlett
2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett
1 sibling, 1 reply; 4+ messages in thread
From: Sidhartha Kumar @ 2024-08-12 19:05 UTC (permalink / raw)
To: linux-kernel, maple-tree
Cc: linux-mm, akpm, liam.howlett, willy, Sidhartha Kumar
Add new callback fields to the userspace implementation of struct
kmem_cache. This allows for executing callback functions in order to
further test low memory scenarios where node allocation is retried.
This callback can help test race conditions by calling a function when a
low memory event is tested.
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
v1 -> v2:
- change test name to check_nomem_writer_race()
- move test down in farmer_tests()
- remove mas_destroy() from check_nomem_writer_race() as its not
needed
- remove using mas.index and mas.last directly through
mas_set_range() and MA_STATE macros.
- remove uneeded mas_reset() in check_nomem_writer_race().
lib/maple_tree.c | 13 +++++++
tools/testing/radix-tree/maple.c | 63 ++++++++++++++++++++++++++++++++
tools/testing/shared/linux.c | 26 ++++++++++++-
3 files changed, 101 insertions(+), 1 deletion(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index b547ff211ac7..14d7864b8d53 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -7005,6 +7005,19 @@ void mt_set_non_kernel(unsigned int val)
kmem_cache_set_non_kernel(maple_node_cache, val);
}
+extern void kmem_cache_set_callback(struct kmem_cache *cachep,
+ void (*callback)(void *));
+void mt_set_callback(void (*callback)(void *))
+{
+ kmem_cache_set_callback(maple_node_cache, callback);
+}
+
+extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
+void mt_set_private(void *private)
+{
+ kmem_cache_set_private(maple_node_cache, private);
+}
+
extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
unsigned long mt_get_alloc_size(void)
{
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index cd1cf05503b4..ef5b83cf94ea 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -36224,6 +36224,65 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt)
extern void test_kmem_cache_bulk(void);
+/* callback function used for check_nomem_writer_race() */
+static void writer2(void *maple_tree)
+{
+ struct maple_tree *mt = (struct maple_tree *)maple_tree;
+ MA_STATE(mas, mt, 6, 10);
+
+ mtree_lock(mas.tree);
+ mas_store(&mas, xa_mk_value(0xC));
+ mas_destroy(&mas);
+ mtree_unlock(mas.tree);
+}
+
+/*
+ * check_nomem_writer_race() - test a possible race in the mas_nomem() path
+ * @mt: The tree to build.
+ *
+ * There is a possible race condition in low memory conditions when mas_nomem()
+ * gives up its lock. A second writer can chagne the entry that the primary
+ * writer executing the mas_nomem() path is modifying. This test recreates this
+ * scenario to ensure we are handling it correctly.
+ */
+static void check_nomem_writer_race(struct maple_tree *mt)
+{
+ MA_STATE(mas, mt, 0, 5);
+
+ mt_set_non_kernel(0);
+ /* setup root with 2 values with NULL in between */
+ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
+ mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
+ mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL);
+
+ /* setup writer 2 that will trigger the race condition */
+ mt_set_private(mt);
+ mt_set_callback(writer2);
+
+ mtree_lock(mt);
+ /* erase 0-5 */
+ mas_erase(&mas);
+
+ /* index 6-10 should retain the value from writer 2 */
+ check_load(mt, 6, xa_mk_value(0xC));
+ mtree_unlock(mt);
+
+ /* test for the same race but with mas_store_gfp() */
+ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
+ mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
+
+ mas_set_range(&mas, 0, 5);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+
+ /* ensure write made by writer 2 is retained */
+ check_load(mt, 6, xa_mk_value(0xC));
+
+ mt_set_private(NULL);
+ mt_set_callback(NULL);
+ mtree_unlock(mt);
+}
+
void farmer_tests(void)
{
struct maple_node *node;
@@ -36257,6 +36316,10 @@ void farmer_tests(void)
check_dfs_preorder(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU);
+ check_nomem_writer_race(&tree);
+ mtree_destroy(&tree);
+
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_prealloc(&tree);
mtree_destroy(&tree);
diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c
index 4eb442206d01..17263696b5d8 100644
--- a/tools/testing/shared/linux.c
+++ b/tools/testing/shared/linux.c
@@ -26,8 +26,21 @@ struct kmem_cache {
unsigned int non_kernel;
unsigned long nr_allocated;
unsigned long nr_tallocated;
+ bool exec_callback;
+ void (*callback)(void *);
+ void *private;
};
+void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *))
+{
+ cachep->callback = callback;
+}
+
+void kmem_cache_set_private(struct kmem_cache *cachep, void *private)
+{
+ cachep->private = private;
+}
+
void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val)
{
cachep->non_kernel = val;
@@ -58,9 +71,17 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *p;
+ if (cachep->exec_callback) {
+ if (cachep->callback)
+ cachep->callback(cachep->private);
+ cachep->exec_callback = false;
+ }
+
if (!(gfp & __GFP_DIRECT_RECLAIM)) {
- if (!cachep->non_kernel)
+ if (!cachep->non_kernel) {
+ cachep->exec_callback = true;
return NULL;
+ }
cachep->non_kernel--;
}
@@ -223,6 +244,9 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
ret->objs = NULL;
ret->ctor = ctor;
ret->non_kernel = 0;
+ ret->exec_callback = false;
+ ret->callback = NULL;
+ ret->private = NULL;
return ret;
}
--
2.46.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries
2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar
2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar
@ 2024-08-13 14:12 ` Liam R. Howlett
1 sibling, 0 replies; 4+ messages in thread
From: Liam R. Howlett @ 2024-08-13 14:12 UTC (permalink / raw)
To: Sidhartha Kumar; +Cc: linux-kernel, maple-tree, linux-mm, akpm, willy
* Sidhartha Kumar <sidhartha.kumar@oracle.com> [240812 15:05]:
> The following scenario can result in a race condition:
>
> Consider a node with the following indices and values
>
> a<------->b<----------->c<--------->d
> 0xA NULL 0xB
>
> CPU 1 CPU 2
> --------- ---------
> mas_set_range(a,b)
> mas_erase()
> -> range is expanded (a,c) because of null expansion
>
> mas_nomem()
> mas_unlock()
> mas_store_range(b,c,0xC)
>
> The node now looks like:
>
> a<------->b<----------->c<--------->d
> 0xA 0xC 0xB
>
> mas_lock()
> mas_erase() <------ range of erase is still (a,c)
>
> The node is now NULL from (a,c) but the write from CPU 2 should have been
> retained and range (b,c) should still have 0xC as its value. We can fix
> this by re-intializing to the original index and last. This does not need
> a cc: Stable as there are no users of the maple tree which use internal
> locking and this condition is only possible with internal locking.
>
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
> v1 -> v2:
> - re-initialize index and last in the mas_nomem() if statement so
> fast path is not effected in mas_erase().
>
> - use __mas_set_range() rather than set mas->index and mas->last
> directly.
>
> lib/maple_tree.c | 16 ++++++++++++----
> 1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index aa3a5df15b8e..b547ff211ac7 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store);
> */
> int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
> {
> + unsigned long index = mas->index;
> + unsigned long last = mas->last;
> MA_WR_STATE(wr_mas, mas, entry);
>
> mas_wr_store_setup(&wr_mas);
> trace_ma_write(__func__, mas, 0, entry);
> retry:
> mas_wr_store_entry(&wr_mas);
> - if (unlikely(mas_nomem(mas, gfp)))
> + if (unlikely(mas_nomem(mas, gfp))) {
> + if (!entry)
> + __mas_set_range(mas, index, last);
> goto retry;
> + }
>
> if (unlikely(mas_is_err(mas)))
> return xa_err(mas->node);
> @@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
> void *mas_erase(struct ma_state *mas)
> {
> void *entry;
> + unsigned long index = mas->index;
> MA_WR_STATE(wr_mas, mas, NULL);
>
> if (!mas_is_active(mas) || !mas_is_start(mas))
> mas->status = ma_start;
>
> - /* Retry unnecessary when holding the write lock. */
> +write_retry:
> entry = mas_state_walk(mas);
> if (!entry)
> return NULL;
>
> -write_retry:
> /* Must reset to ensure spanning writes of last slot are detected */
> mas_reset(mas);
> mas_wr_store_setup(&wr_mas);
> mas_wr_store_entry(&wr_mas);
> - if (mas_nomem(mas, GFP_KERNEL))
> + if (mas_nomem(mas, GFP_KERNEL)) {
> + /* in case the range of entry changed when unlocked */
> + mas->index = mas->last = index;
> goto write_retry;
> + }
>
> return entry;
> }
> --
> 2.46.0
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions
2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar
@ 2024-08-13 14:12 ` Liam R. Howlett
0 siblings, 0 replies; 4+ messages in thread
From: Liam R. Howlett @ 2024-08-13 14:12 UTC (permalink / raw)
To: Sidhartha Kumar; +Cc: linux-kernel, maple-tree, linux-mm, akpm, willy
* Sidhartha Kumar <sidhartha.kumar@oracle.com> [240812 15:05]:
> Add new callback fields to the userspace implementation of struct
> kmem_cache. This allows for executing callback functions in order to
> further test low memory scenarios where node allocation is retried.
>
> This callback can help test race conditions by calling a function when a
> low memory event is tested.
>
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
> v1 -> v2:
> - change test name to check_nomem_writer_race()
> - move test down in farmer_tests()
> - remove mas_destroy() from check_nomem_writer_race() as its not
> needed
> - remove using mas.index and mas.last directly through
> mas_set_range() and MA_STATE macros.
> - remove uneeded mas_reset() in check_nomem_writer_race().
>
> lib/maple_tree.c | 13 +++++++
> tools/testing/radix-tree/maple.c | 63 ++++++++++++++++++++++++++++++++
> tools/testing/shared/linux.c | 26 ++++++++++++-
> 3 files changed, 101 insertions(+), 1 deletion(-)
>
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index b547ff211ac7..14d7864b8d53 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -7005,6 +7005,19 @@ void mt_set_non_kernel(unsigned int val)
> kmem_cache_set_non_kernel(maple_node_cache, val);
> }
>
> +extern void kmem_cache_set_callback(struct kmem_cache *cachep,
> + void (*callback)(void *));
> +void mt_set_callback(void (*callback)(void *))
> +{
> + kmem_cache_set_callback(maple_node_cache, callback);
> +}
> +
> +extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
> +void mt_set_private(void *private)
> +{
> + kmem_cache_set_private(maple_node_cache, private);
> +}
> +
> extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
> unsigned long mt_get_alloc_size(void)
> {
> diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> index cd1cf05503b4..ef5b83cf94ea 100644
> --- a/tools/testing/radix-tree/maple.c
> +++ b/tools/testing/radix-tree/maple.c
> @@ -36224,6 +36224,65 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt)
>
> extern void test_kmem_cache_bulk(void);
>
> +/* callback function used for check_nomem_writer_race() */
> +static void writer2(void *maple_tree)
> +{
> + struct maple_tree *mt = (struct maple_tree *)maple_tree;
> + MA_STATE(mas, mt, 6, 10);
> +
> + mtree_lock(mas.tree);
> + mas_store(&mas, xa_mk_value(0xC));
> + mas_destroy(&mas);
> + mtree_unlock(mas.tree);
> +}
> +
> +/*
> + * check_nomem_writer_race() - test a possible race in the mas_nomem() path
> + * @mt: The tree to build.
> + *
> + * There is a possible race condition in low memory conditions when mas_nomem()
> + * gives up its lock. A second writer can chagne the entry that the primary
> + * writer executing the mas_nomem() path is modifying. This test recreates this
> + * scenario to ensure we are handling it correctly.
> + */
> +static void check_nomem_writer_race(struct maple_tree *mt)
> +{
> + MA_STATE(mas, mt, 0, 5);
> +
> + mt_set_non_kernel(0);
> + /* setup root with 2 values with NULL in between */
> + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
> + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
> + mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL);
> +
> + /* setup writer 2 that will trigger the race condition */
> + mt_set_private(mt);
> + mt_set_callback(writer2);
> +
> + mtree_lock(mt);
> + /* erase 0-5 */
> + mas_erase(&mas);
> +
> + /* index 6-10 should retain the value from writer 2 */
> + check_load(mt, 6, xa_mk_value(0xC));
> + mtree_unlock(mt);
> +
> + /* test for the same race but with mas_store_gfp() */
> + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
> + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
> +
> + mas_set_range(&mas, 0, 5);
> + mtree_lock(mt);
> + mas_store_gfp(&mas, NULL, GFP_KERNEL);
> +
> + /* ensure write made by writer 2 is retained */
> + check_load(mt, 6, xa_mk_value(0xC));
> +
> + mt_set_private(NULL);
> + mt_set_callback(NULL);
> + mtree_unlock(mt);
> +}
> +
> void farmer_tests(void)
> {
> struct maple_node *node;
> @@ -36257,6 +36316,10 @@ void farmer_tests(void)
> check_dfs_preorder(&tree);
> mtree_destroy(&tree);
>
> + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU);
> + check_nomem_writer_race(&tree);
> + mtree_destroy(&tree);
> +
> mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
> check_prealloc(&tree);
> mtree_destroy(&tree);
> diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c
> index 4eb442206d01..17263696b5d8 100644
> --- a/tools/testing/shared/linux.c
> +++ b/tools/testing/shared/linux.c
> @@ -26,8 +26,21 @@ struct kmem_cache {
> unsigned int non_kernel;
> unsigned long nr_allocated;
> unsigned long nr_tallocated;
> + bool exec_callback;
> + void (*callback)(void *);
> + void *private;
> };
>
> +void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *))
> +{
> + cachep->callback = callback;
> +}
> +
> +void kmem_cache_set_private(struct kmem_cache *cachep, void *private)
> +{
> + cachep->private = private;
> +}
> +
> void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val)
> {
> cachep->non_kernel = val;
> @@ -58,9 +71,17 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
> {
> void *p;
>
> + if (cachep->exec_callback) {
> + if (cachep->callback)
> + cachep->callback(cachep->private);
> + cachep->exec_callback = false;
> + }
> +
> if (!(gfp & __GFP_DIRECT_RECLAIM)) {
> - if (!cachep->non_kernel)
> + if (!cachep->non_kernel) {
> + cachep->exec_callback = true;
> return NULL;
> + }
>
> cachep->non_kernel--;
> }
> @@ -223,6 +244,9 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
> ret->objs = NULL;
> ret->ctor = ctor;
> ret->non_kernel = 0;
> + ret->exec_callback = false;
> + ret->callback = NULL;
> + ret->private = NULL;
> return ret;
> }
>
> --
> 2.46.0
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-08-13 14:12 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar
2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar
2024-08-13 14:12 ` Liam R. Howlett
2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox