* [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries
@ 2024-08-12 19:05 Sidhartha Kumar
2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar
2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett
0 siblings, 2 replies; 4+ messages in thread
From: Sidhartha Kumar @ 2024-08-12 19:05 UTC (permalink / raw)
To: linux-kernel, maple-tree
Cc: linux-mm, akpm, liam.howlett, willy, Sidhartha Kumar
The following scenario can result in a race condition:
Consider a node with the following indices and values
a<------->b<----------->c<--------->d
0xA NULL 0xB
CPU 1 CPU 2
--------- ---------
mas_set_range(a,b)
mas_erase()
-> range is expanded (a,c) because of null expansion
mas_nomem()
mas_unlock()
mas_store_range(b,c,0xC)
The node now looks like:
a<------->b<----------->c<--------->d
0xA 0xC 0xB
mas_lock()
mas_erase() <------ range of erase is still (a,c)
The node is now NULL from (a,c) but the write from CPU 2 should have been
retained and range (b,c) should still have 0xC as its value. We can fix
this by re-intializing to the original index and last. This does not need
a cc: Stable as there are no users of the maple tree which use internal
locking and this condition is only possible with internal locking.
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
v1 -> v2:
- re-initialize index and last in the mas_nomem() if statement so
fast path is not effected in mas_erase().
- use __mas_set_range() rather than set mas->index and mas->last
directly.
lib/maple_tree.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index aa3a5df15b8e..b547ff211ac7 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store);
*/
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
{
+ unsigned long index = mas->index;
+ unsigned long last = mas->last;
MA_WR_STATE(wr_mas, mas, entry);
mas_wr_store_setup(&wr_mas);
trace_ma_write(__func__, mas, 0, entry);
retry:
mas_wr_store_entry(&wr_mas);
- if (unlikely(mas_nomem(mas, gfp)))
+ if (unlikely(mas_nomem(mas, gfp))) {
+ if (!entry)
+ __mas_set_range(mas, index, last);
goto retry;
+ }
if (unlikely(mas_is_err(mas)))
return xa_err(mas->node);
@@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
void *mas_erase(struct ma_state *mas)
{
void *entry;
+ unsigned long index = mas->index;
MA_WR_STATE(wr_mas, mas, NULL);
if (!mas_is_active(mas) || !mas_is_start(mas))
mas->status = ma_start;
- /* Retry unnecessary when holding the write lock. */
+write_retry:
entry = mas_state_walk(mas);
if (!entry)
return NULL;
-write_retry:
/* Must reset to ensure spanning writes of last slot are detected */
mas_reset(mas);
mas_wr_store_setup(&wr_mas);
mas_wr_store_entry(&wr_mas);
- if (mas_nomem(mas, GFP_KERNEL))
+ if (mas_nomem(mas, GFP_KERNEL)) {
+ /* in case the range of entry changed when unlocked */
+ mas->index = mas->last = index;
goto write_retry;
+ }
return entry;
}
--
2.46.0
^ permalink raw reply [flat|nested] 4+ messages in thread* [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions 2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar @ 2024-08-12 19:05 ` Sidhartha Kumar 2024-08-13 14:12 ` Liam R. Howlett 2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett 1 sibling, 1 reply; 4+ messages in thread From: Sidhartha Kumar @ 2024-08-12 19:05 UTC (permalink / raw) To: linux-kernel, maple-tree Cc: linux-mm, akpm, liam.howlett, willy, Sidhartha Kumar Add new callback fields to the userspace implementation of struct kmem_cache. This allows for executing callback functions in order to further test low memory scenarios where node allocation is retried. This callback can help test race conditions by calling a function when a low memory event is tested. Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> --- v1 -> v2: - change test name to check_nomem_writer_race() - move test down in farmer_tests() - remove mas_destroy() from check_nomem_writer_race() as its not needed - remove using mas.index and mas.last directly through mas_set_range() and MA_STATE macros. - remove uneeded mas_reset() in check_nomem_writer_race(). lib/maple_tree.c | 13 +++++++ tools/testing/radix-tree/maple.c | 63 ++++++++++++++++++++++++++++++++ tools/testing/shared/linux.c | 26 ++++++++++++- 3 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b547ff211ac7..14d7864b8d53 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -7005,6 +7005,19 @@ void mt_set_non_kernel(unsigned int val) kmem_cache_set_non_kernel(maple_node_cache, val); } +extern void kmem_cache_set_callback(struct kmem_cache *cachep, + void (*callback)(void *)); +void mt_set_callback(void (*callback)(void *)) +{ + kmem_cache_set_callback(maple_node_cache, callback); +} + +extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); +void mt_set_private(void *private) +{ + kmem_cache_set_private(maple_node_cache, private); +} + extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); unsigned long mt_get_alloc_size(void) { diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index cd1cf05503b4..ef5b83cf94ea 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -36224,6 +36224,65 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt) extern void test_kmem_cache_bulk(void); +/* callback function used for check_nomem_writer_race() */ +static void writer2(void *maple_tree) +{ + struct maple_tree *mt = (struct maple_tree *)maple_tree; + MA_STATE(mas, mt, 6, 10); + + mtree_lock(mas.tree); + mas_store(&mas, xa_mk_value(0xC)); + mas_destroy(&mas); + mtree_unlock(mas.tree); +} + +/* + * check_nomem_writer_race() - test a possible race in the mas_nomem() path + * @mt: The tree to build. + * + * There is a possible race condition in low memory conditions when mas_nomem() + * gives up its lock. A second writer can chagne the entry that the primary + * writer executing the mas_nomem() path is modifying. This test recreates this + * scenario to ensure we are handling it correctly. + */ +static void check_nomem_writer_race(struct maple_tree *mt) +{ + MA_STATE(mas, mt, 0, 5); + + mt_set_non_kernel(0); + /* setup root with 2 values with NULL in between */ + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); + mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL); + + /* setup writer 2 that will trigger the race condition */ + mt_set_private(mt); + mt_set_callback(writer2); + + mtree_lock(mt); + /* erase 0-5 */ + mas_erase(&mas); + + /* index 6-10 should retain the value from writer 2 */ + check_load(mt, 6, xa_mk_value(0xC)); + mtree_unlock(mt); + + /* test for the same race but with mas_store_gfp() */ + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); + + mas_set_range(&mas, 0, 5); + mtree_lock(mt); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + + /* ensure write made by writer 2 is retained */ + check_load(mt, 6, xa_mk_value(0xC)); + + mt_set_private(NULL); + mt_set_callback(NULL); + mtree_unlock(mt); +} + void farmer_tests(void) { struct maple_node *node; @@ -36257,6 +36316,10 @@ void farmer_tests(void) check_dfs_preorder(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU); + check_nomem_writer_race(&tree); + mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_prealloc(&tree); mtree_destroy(&tree); diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 4eb442206d01..17263696b5d8 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -26,8 +26,21 @@ struct kmem_cache { unsigned int non_kernel; unsigned long nr_allocated; unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; }; +void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) +{ + cachep->callback = callback; +} + +void kmem_cache_set_private(struct kmem_cache *cachep, void *private) +{ + cachep->private = private; +} + void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val) { cachep->non_kernel = val; @@ -58,9 +71,17 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, { void *p; + if (cachep->exec_callback) { + if (cachep->callback) + cachep->callback(cachep->private); + cachep->exec_callback = false; + } + if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (!cachep->non_kernel) + if (!cachep->non_kernel) { + cachep->exec_callback = true; return NULL; + } cachep->non_kernel--; } @@ -223,6 +244,9 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, ret->objs = NULL; ret->ctor = ctor; ret->non_kernel = 0; + ret->exec_callback = false; + ret->callback = NULL; + ret->private = NULL; return ret; } -- 2.46.0 ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions 2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar @ 2024-08-13 14:12 ` Liam R. Howlett 0 siblings, 0 replies; 4+ messages in thread From: Liam R. Howlett @ 2024-08-13 14:12 UTC (permalink / raw) To: Sidhartha Kumar; +Cc: linux-kernel, maple-tree, linux-mm, akpm, willy * Sidhartha Kumar <sidhartha.kumar@oracle.com> [240812 15:05]: > Add new callback fields to the userspace implementation of struct > kmem_cache. This allows for executing callback functions in order to > further test low memory scenarios where node allocation is retried. > > This callback can help test race conditions by calling a function when a > low memory event is tested. > > Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> > --- > v1 -> v2: > - change test name to check_nomem_writer_race() > - move test down in farmer_tests() > - remove mas_destroy() from check_nomem_writer_race() as its not > needed > - remove using mas.index and mas.last directly through > mas_set_range() and MA_STATE macros. > - remove uneeded mas_reset() in check_nomem_writer_race(). > > lib/maple_tree.c | 13 +++++++ > tools/testing/radix-tree/maple.c | 63 ++++++++++++++++++++++++++++++++ > tools/testing/shared/linux.c | 26 ++++++++++++- > 3 files changed, 101 insertions(+), 1 deletion(-) > > diff --git a/lib/maple_tree.c b/lib/maple_tree.c > index b547ff211ac7..14d7864b8d53 100644 > --- a/lib/maple_tree.c > +++ b/lib/maple_tree.c > @@ -7005,6 +7005,19 @@ void mt_set_non_kernel(unsigned int val) > kmem_cache_set_non_kernel(maple_node_cache, val); > } > > +extern void kmem_cache_set_callback(struct kmem_cache *cachep, > + void (*callback)(void *)); > +void mt_set_callback(void (*callback)(void *)) > +{ > + kmem_cache_set_callback(maple_node_cache, callback); > +} > + > +extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); > +void mt_set_private(void *private) > +{ > + kmem_cache_set_private(maple_node_cache, private); > +} > + > extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); > unsigned long mt_get_alloc_size(void) > { > diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c > index cd1cf05503b4..ef5b83cf94ea 100644 > --- a/tools/testing/radix-tree/maple.c > +++ b/tools/testing/radix-tree/maple.c > @@ -36224,6 +36224,65 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt) > > extern void test_kmem_cache_bulk(void); > > +/* callback function used for check_nomem_writer_race() */ > +static void writer2(void *maple_tree) > +{ > + struct maple_tree *mt = (struct maple_tree *)maple_tree; > + MA_STATE(mas, mt, 6, 10); > + > + mtree_lock(mas.tree); > + mas_store(&mas, xa_mk_value(0xC)); > + mas_destroy(&mas); > + mtree_unlock(mas.tree); > +} > + > +/* > + * check_nomem_writer_race() - test a possible race in the mas_nomem() path > + * @mt: The tree to build. > + * > + * There is a possible race condition in low memory conditions when mas_nomem() > + * gives up its lock. A second writer can chagne the entry that the primary > + * writer executing the mas_nomem() path is modifying. This test recreates this > + * scenario to ensure we are handling it correctly. > + */ > +static void check_nomem_writer_race(struct maple_tree *mt) > +{ > + MA_STATE(mas, mt, 0, 5); > + > + mt_set_non_kernel(0); > + /* setup root with 2 values with NULL in between */ > + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); > + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); > + mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL); > + > + /* setup writer 2 that will trigger the race condition */ > + mt_set_private(mt); > + mt_set_callback(writer2); > + > + mtree_lock(mt); > + /* erase 0-5 */ > + mas_erase(&mas); > + > + /* index 6-10 should retain the value from writer 2 */ > + check_load(mt, 6, xa_mk_value(0xC)); > + mtree_unlock(mt); > + > + /* test for the same race but with mas_store_gfp() */ > + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); > + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); > + > + mas_set_range(&mas, 0, 5); > + mtree_lock(mt); > + mas_store_gfp(&mas, NULL, GFP_KERNEL); > + > + /* ensure write made by writer 2 is retained */ > + check_load(mt, 6, xa_mk_value(0xC)); > + > + mt_set_private(NULL); > + mt_set_callback(NULL); > + mtree_unlock(mt); > +} > + > void farmer_tests(void) > { > struct maple_node *node; > @@ -36257,6 +36316,10 @@ void farmer_tests(void) > check_dfs_preorder(&tree); > mtree_destroy(&tree); > > + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU); > + check_nomem_writer_race(&tree); > + mtree_destroy(&tree); > + > mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); > check_prealloc(&tree); > mtree_destroy(&tree); > diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c > index 4eb442206d01..17263696b5d8 100644 > --- a/tools/testing/shared/linux.c > +++ b/tools/testing/shared/linux.c > @@ -26,8 +26,21 @@ struct kmem_cache { > unsigned int non_kernel; > unsigned long nr_allocated; > unsigned long nr_tallocated; > + bool exec_callback; > + void (*callback)(void *); > + void *private; > }; > > +void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) > +{ > + cachep->callback = callback; > +} > + > +void kmem_cache_set_private(struct kmem_cache *cachep, void *private) > +{ > + cachep->private = private; > +} > + > void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val) > { > cachep->non_kernel = val; > @@ -58,9 +71,17 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, > { > void *p; > > + if (cachep->exec_callback) { > + if (cachep->callback) > + cachep->callback(cachep->private); > + cachep->exec_callback = false; > + } > + > if (!(gfp & __GFP_DIRECT_RECLAIM)) { > - if (!cachep->non_kernel) > + if (!cachep->non_kernel) { > + cachep->exec_callback = true; > return NULL; > + } > > cachep->non_kernel--; > } > @@ -223,6 +244,9 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, > ret->objs = NULL; > ret->ctor = ctor; > ret->non_kernel = 0; > + ret->exec_callback = false; > + ret->callback = NULL; > + ret->private = NULL; > return ret; > } > > -- > 2.46.0 > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries 2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar 2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar @ 2024-08-13 14:12 ` Liam R. Howlett 1 sibling, 0 replies; 4+ messages in thread From: Liam R. Howlett @ 2024-08-13 14:12 UTC (permalink / raw) To: Sidhartha Kumar; +Cc: linux-kernel, maple-tree, linux-mm, akpm, willy * Sidhartha Kumar <sidhartha.kumar@oracle.com> [240812 15:05]: > The following scenario can result in a race condition: > > Consider a node with the following indices and values > > a<------->b<----------->c<--------->d > 0xA NULL 0xB > > CPU 1 CPU 2 > --------- --------- > mas_set_range(a,b) > mas_erase() > -> range is expanded (a,c) because of null expansion > > mas_nomem() > mas_unlock() > mas_store_range(b,c,0xC) > > The node now looks like: > > a<------->b<----------->c<--------->d > 0xA 0xC 0xB > > mas_lock() > mas_erase() <------ range of erase is still (a,c) > > The node is now NULL from (a,c) but the write from CPU 2 should have been > retained and range (b,c) should still have 0xC as its value. We can fix > this by re-intializing to the original index and last. This does not need > a cc: Stable as there are no users of the maple tree which use internal > locking and this condition is only possible with internal locking. > > Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> > --- > v1 -> v2: > - re-initialize index and last in the mas_nomem() if statement so > fast path is not effected in mas_erase(). > > - use __mas_set_range() rather than set mas->index and mas->last > directly. > > lib/maple_tree.c | 16 ++++++++++++---- > 1 file changed, 12 insertions(+), 4 deletions(-) > > diff --git a/lib/maple_tree.c b/lib/maple_tree.c > index aa3a5df15b8e..b547ff211ac7 100644 > --- a/lib/maple_tree.c > +++ b/lib/maple_tree.c > @@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store); > */ > int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) > { > + unsigned long index = mas->index; > + unsigned long last = mas->last; > MA_WR_STATE(wr_mas, mas, entry); > > mas_wr_store_setup(&wr_mas); > trace_ma_write(__func__, mas, 0, entry); > retry: > mas_wr_store_entry(&wr_mas); > - if (unlikely(mas_nomem(mas, gfp))) > + if (unlikely(mas_nomem(mas, gfp))) { > + if (!entry) > + __mas_set_range(mas, index, last); > goto retry; > + } > > if (unlikely(mas_is_err(mas))) > return xa_err(mas->node); > @@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev); > void *mas_erase(struct ma_state *mas) > { > void *entry; > + unsigned long index = mas->index; > MA_WR_STATE(wr_mas, mas, NULL); > > if (!mas_is_active(mas) || !mas_is_start(mas)) > mas->status = ma_start; > > - /* Retry unnecessary when holding the write lock. */ > +write_retry: > entry = mas_state_walk(mas); > if (!entry) > return NULL; > > -write_retry: > /* Must reset to ensure spanning writes of last slot are detected */ > mas_reset(mas); > mas_wr_store_setup(&wr_mas); > mas_wr_store_entry(&wr_mas); > - if (mas_nomem(mas, GFP_KERNEL)) > + if (mas_nomem(mas, GFP_KERNEL)) { > + /* in case the range of entry changed when unlocked */ > + mas->index = mas->last = index; > goto write_retry; > + } > > return entry; > } > -- > 2.46.0 > ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-08-13 14:12 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-08-12 19:05 [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Sidhartha Kumar 2024-08-12 19:05 ` [PATCH v2 2/2] maple_tree: add test to replicate low memory race conditions Sidhartha Kumar 2024-08-13 14:12 ` Liam R. Howlett 2024-08-13 14:12 ` [PATCH v2 1/2] maple_tree: reset mas->index and mas->last on write retries Liam R. Howlett
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox