linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
       [not found] <20240807-b4-slab-kfree_rcu-destroy-v2-0-ea79102f428c@suse.cz>
@ 2024-08-09 15:02 ` Vlastimil Babka
  2024-08-09 15:12   ` Jann Horn
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-5-ea79102f428c@suse.cz>
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-08-09 15:02 UTC (permalink / raw)
  To: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Stephen Rothwell
  Cc: Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

On 8/7/24 12:31, Vlastimil Babka wrote:
> Also in git:
> https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2

I've added this to slab/for-next, there will be some conflicts and here's my
resulting git show or the merge commit I tried over today's next.

It might look a bit different with tomorrow's next as mm will have v7 of the
conflicting series from Jann:

https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@suse.cz/

(also I did resolve it in the way I suggested to move Jann's block before
taking slab_mutex() but unless that happens in mm-unstable it would probably be more
correct to keep where he did)

---8<---
commit 444486f2b7b0325ba026e0ad129eba3f54c18301
Merge: 61c01d2e181a 63eac6bdcf9f
Author: Vlastimil Babka <vbabka@suse.cz>
Date:   Fri Aug 9 16:49:03 2024 +0200

    Merge branch 'slab/for-6.12/rcu_barriers' into HEAD

diff --cc include/linux/rcutree.h
index 7dbde2b6f714,58e7db80f3a8..90a684f94776
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@@ -35,9 -35,10 +35,10 @@@ static inline void rcu_virt_note_contex
  
  void synchronize_rcu_expedited(void);
  void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+ void kvfree_rcu_barrier(void);
  
  void rcu_barrier(void);
 -void rcu_momentary_dyntick_idle(void);
 +void rcu_momentary_eqs(void);
  void kfree_rcu_scheduler_running(void);
  bool rcu_gp_might_be_stalled(void);
  
diff --cc kernel/rcu/tree.c
index 930846f06bee,ebcfed9b570e..4606fa361b06
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -3614,7 -3631,7 +3611,7 @@@ kvfree_rcu_queue_batch(struct kfree_rcu
  			// be that the work is in the pending state when
  			// channels have been detached following by each
  			// other.
- 			queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
 -			queued = queue_rcu_work(system_wq, &krwp->rcu_work);
++			queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
  		}
  	}
  
diff --cc mm/slab_common.c
index fc7b1250d929,1a2873293f5d..82f287c21954
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@@ -511,67 -487,40 +505,52 @@@ EXPORT_SYMBOL(kmem_buckets_create)
   */
  static void kmem_cache_release(struct kmem_cache *s)
  {
- 	if (slab_state >= FULL) {
- 		sysfs_slab_unlink(s);
+ 	kfence_shutdown_cache(s);
+ 	if (__is_defined(SLAB_SUPPORTS_SYSFS) && slab_state >= FULL)
  		sysfs_slab_release(s);
- 	} else {
+ 	else
  		slab_kmem_cache_release(s);
- 	}
  }
- #else
- static void kmem_cache_release(struct kmem_cache *s)
+ 
+ void slab_kmem_cache_release(struct kmem_cache *s)
  {
- 	slab_kmem_cache_release(s);
+ 	__kmem_cache_release(s);
+ 	kfree_const(s->name);
+ 	kmem_cache_free(kmem_cache, s);
  }
- #endif
  
- static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
+ void kmem_cache_destroy(struct kmem_cache *s)
  {
- 	LIST_HEAD(to_destroy);
- 	struct kmem_cache *s, *s2;
- 
- 	/*
- 	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
- 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
- 	 * through RCU and the associated kmem_cache are dereferenced
- 	 * while freeing the pages, so the kmem_caches should be freed only
- 	 * after the pending RCU operations are finished.  As rcu_barrier()
- 	 * is a pretty slow operation, we batch all pending destructions
- 	 * asynchronously.
- 	 */
- 	mutex_lock(&slab_mutex);
- 	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
- 	mutex_unlock(&slab_mutex);
+ 	int err;
  
- 	if (list_empty(&to_destroy))
+ 	if (unlikely(!s) || !kasan_check_byte(s))
  		return;
  
- 	rcu_barrier();
+ 	/* in-flight kfree_rcu()'s may include objects from our cache */
+ 	kvfree_rcu_barrier();
  
- 	list_for_each_entry_safe(s, s2, &to_destroy, list) {
- 		debugfs_slab_release(s);
- 		kfence_shutdown_cache(s);
- 		kmem_cache_release(s);
- 	}
- }
- 
- static int shutdown_cache(struct kmem_cache *s)
- {
 +	if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
 +	    (s->flags & SLAB_TYPESAFE_BY_RCU)) {
 +		/*
 +		 * Under CONFIG_SLUB_RCU_DEBUG, when objects in a
 +		 * SLAB_TYPESAFE_BY_RCU slab are freed, SLUB will internally
 +		 * defer their freeing with call_rcu().
 +		 * Wait for such call_rcu() invocations here before actually
 +		 * destroying the cache.
 +		 */
 +		rcu_barrier();
 +	}
 +
+ 	cpus_read_lock();
+ 	mutex_lock(&slab_mutex);
+ 
+ 	s->refcount--;
+ 	if (s->refcount) {
+ 		mutex_unlock(&slab_mutex);
+ 		cpus_read_unlock();
+ 		return;
+ 	}
+ 
  	/* free asan quarantined objects */
  	kasan_cache_shutdown(s);
  



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
  2024-08-09 15:02 ` [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy() Vlastimil Babka
@ 2024-08-09 15:12   ` Jann Horn
  2024-08-09 15:14     ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Jann Horn @ 2024-08-09 15:12 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Stephen Rothwell,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Mateusz Guzik

On Fri, Aug 9, 2024 at 5:02 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> On 8/7/24 12:31, Vlastimil Babka wrote:
> > Also in git:
> > https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2
>
> I've added this to slab/for-next, there will be some conflicts and here's my
> resulting git show or the merge commit I tried over today's next.
>
> It might look a bit different with tomorrow's next as mm will have v7 of the
> conflicting series from Jann:
>
> https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@suse.cz/
>
> (also I did resolve it in the way I suggested to move Jann's block before
> taking slab_mutex() but unless that happens in mm-unstable it would probably be more
> correct to keep where he did)

Regarding my conflicting patch: Do you want me to send a v8 of that
one now to move things around in my patch as you suggested? Or should
we do that in the slab tree after the conflict has been resolved in
Linus' tree, or something like that?
I'm not sure which way of doing this would minimize work for maintainers...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
  2024-08-09 15:12   ` Jann Horn
@ 2024-08-09 15:14     ` Vlastimil Babka
  2024-08-10  0:11       ` Andrew Morton
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-08-09 15:14 UTC (permalink / raw)
  To: Jann Horn
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Stephen Rothwell,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Mateusz Guzik

On 8/9/24 17:12, Jann Horn wrote:
> On Fri, Aug 9, 2024 at 5:02 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>> On 8/7/24 12:31, Vlastimil Babka wrote:
>> > Also in git:
>> > https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2
>>
>> I've added this to slab/for-next, there will be some conflicts and here's my
>> resulting git show or the merge commit I tried over today's next.
>>
>> It might look a bit different with tomorrow's next as mm will have v7 of the
>> conflicting series from Jann:
>>
>> https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@suse.cz/
>>
>> (also I did resolve it in the way I suggested to move Jann's block before
>> taking slab_mutex() but unless that happens in mm-unstable it would probably be more
>> correct to keep where he did)
> 
> Regarding my conflicting patch: Do you want me to send a v8 of that
> one now to move things around in my patch as you suggested? Or should
> we do that in the slab tree after the conflict has been resolved in
> Linus' tree, or something like that?
> I'm not sure which way of doing this would minimize work for maintainers...

I guess it would be easiest to send a -fix to Andrew as it's rather minor
change. Thanks!


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-7-ea79102f428c@suse.cz>
@ 2024-08-09 16:23   ` Uladzislau Rezki
  2024-09-14 13:22   ` Hyeonggon Yoo
  2024-09-20 13:35   ` Guenter Roeck
  2 siblings, 0 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2024-08-09 16:23 UTC (permalink / raw)
  To: Vlastimil Babka, Paul E. McKenney
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
> Add a test that will create cache, allocate one object, kfree_rcu() it
> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
> in kmem_cache_destroy() works correctly, there should be no warnings in
> dmesg and the test should pass.
> 
> Additionally add a test_leak_destroy() test that leaks an object on
> purpose and verifies that kmem_cache_destroy() catches it.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  lib/slub_kunit.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
> index e6667a28c014..6e3a1e5a7142 100644
> --- a/lib/slub_kunit.c
> +++ b/lib/slub_kunit.c
> @@ -5,6 +5,7 @@
>  #include <linux/slab.h>
>  #include <linux/module.h>
>  #include <linux/kernel.h>
> +#include <linux/rcupdate.h>
>  #include "../mm/slab.h"
>  
>  static struct kunit_resource resource;
> @@ -157,6 +158,34 @@ static void test_kmalloc_redzone_access(struct kunit *test)
>  	kmem_cache_destroy(s);
>  }
>  
> +struct test_kfree_rcu_struct {
> +	struct rcu_head rcu;
> +};
> +
> +static void test_kfree_rcu(struct kunit *test)
> +{
> +	struct kmem_cache *s = test_kmem_cache_create("TestSlub_kfree_rcu",
> +				sizeof(struct test_kfree_rcu_struct),
> +				SLAB_NO_MERGE);
> +	struct test_kfree_rcu_struct *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	kfree_rcu(p, rcu);
> +	kmem_cache_destroy(s);
> +
> +	KUNIT_EXPECT_EQ(test, 0, slab_errors);
> +}
> +
>
Thank you for this test case!

I used this series to test _more_ the barrier and came to conclusion that it is
not enough, i.e. i had to extend it to something like below:

<snip>
+       snprintf(name, sizeof(name), "test-slub-%d", current->pid);
+
+       for (i = 0; i < test_loop_count; i++) {
+               s = test_kmem_cache_create(name, sizeof(struct test_kfree_rcu_struct),
+                       SLAB_NO_MERGE);
+
+               if (!s)
+                       BUG();
+
+               get_random_bytes(&nr_to_alloc, sizeof(nr_to_alloc));
+               nr_to_alloc = nr_to_alloc % 1000000;
+               INIT_LIST_HEAD(&local_head);
+
+               for (j = 0; j < nr_to_alloc; j++) {
+                       p = kmem_cache_alloc(s, GFP_KERNEL);
+
+                       if (p)
+                               list_add(&p->list, &local_head);
+               }
+
+               list_for_each_entry_safe(p, n, &local_head, list)
+                       kfree_rcu(p, rcu);
+
+               kmem_cache_destroy(s);
+       }
<snip>

by using this(~11 parallel jobs) i could trigger a warning that a freed
cache still has some objects and i have already figured out why. I will
send a v2 of barrier implementation with a fix.

Thanks!

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 5/7] rcu/kvfree: Add kvfree_rcu_barrier() API
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-5-ea79102f428c@suse.cz>
@ 2024-08-09 16:26   ` Uladzislau Rezki
  2024-08-09 17:00     ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Uladzislau Rezki @ 2024-08-09 16:26 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

Hello, Vlastimil!

> From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
> 
> Add a kvfree_rcu_barrier() function. It waits until all
> in-flight pointers are freed over RCU machinery. It does
> not wait any GP completion and it is within its right to
> return immediately if there are no outstanding pointers.
> 
> This function is useful when there is a need to guarantee
> that a memory is fully freed before destroying memory caches.
> For example, during unloading a kernel module.
> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/rcutiny.h |   5 +++
>  include/linux/rcutree.h |   1 +
>  kernel/rcu/tree.c       | 103 ++++++++++++++++++++++++++++++++++++++++++++----
>  3 files changed, 101 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index d9ac7b136aea..522123050ff8 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
>  	kvfree(ptr);
>  }
>  
> +static inline void kvfree_rcu_barrier(void)
> +{
> +	rcu_barrier();
> +}
> +
>  #ifdef CONFIG_KASAN_GENERIC
>  void kvfree_call_rcu(struct rcu_head *head, void *ptr);
>  #else
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 254244202ea9..58e7db80f3a8 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
>  
>  void synchronize_rcu_expedited(void);
>  void kvfree_call_rcu(struct rcu_head *head, void *ptr);
> +void kvfree_rcu_barrier(void);
>  
>  void rcu_barrier(void);
>  void rcu_momentary_dyntick_idle(void);
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index e641cc681901..ebcfed9b570e 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3584,18 +3584,15 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
>  }
>  
>  /*
> - * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
> + * Return: %true if a work is queued, %false otherwise.
>   */
> -static void kfree_rcu_monitor(struct work_struct *work)
> +static bool
> +kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
>  {
> -	struct kfree_rcu_cpu *krcp = container_of(work,
> -		struct kfree_rcu_cpu, monitor_work.work);
>  	unsigned long flags;
> +	bool queued = false;
>  	int i, j;
>  
> -	// Drain ready for reclaim.
> -	kvfree_rcu_drain_ready(krcp);
> -
>  	raw_spin_lock_irqsave(&krcp->lock, flags);
>  
>  	// Attempt to start a new batch.
> @@ -3634,11 +3631,27 @@ static void kfree_rcu_monitor(struct work_struct *work)
>  			// be that the work is in the pending state when
>  			// channels have been detached following by each
>  			// other.
> -			queue_rcu_work(system_wq, &krwp->rcu_work);
> +			queued = queue_rcu_work(system_wq, &krwp->rcu_work);
>  		}
>  	}
>  
>  	raw_spin_unlock_irqrestore(&krcp->lock, flags);
> +	return queued;
> +}
> +
> +/*
> + * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
> + */
> +static void kfree_rcu_monitor(struct work_struct *work)
> +{
> +	struct kfree_rcu_cpu *krcp = container_of(work,
> +		struct kfree_rcu_cpu, monitor_work.work);
> +
> +	// Drain ready for reclaim.
> +	kvfree_rcu_drain_ready(krcp);
> +
> +	// Queue a batch for a rest.
> +	kvfree_rcu_queue_batch(krcp);
>  
>  	// If there is nothing to detach, it means that our job is
>  	// successfully done here. In case of having at least one
> @@ -3859,6 +3872,80 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
>  }
>  EXPORT_SYMBOL_GPL(kvfree_call_rcu);
>  
> +/**
> + * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
> + *
> + * Note that a single argument of kvfree_rcu() call has a slow path that
> + * triggers synchronize_rcu() following by freeing a pointer. It is done
> + * before the return from the function. Therefore for any single-argument
> + * call that will result in a kfree() to a cache that is to be destroyed
> + * during module exit, it is developer's responsibility to ensure that all
> + * such calls have returned before the call to kmem_cache_destroy().
> + */
> +void kvfree_rcu_barrier(void)
> +{
> +	struct kfree_rcu_cpu_work *krwp;
> +	struct kfree_rcu_cpu *krcp;
> +	bool queued;
> +	int i, cpu;
> +
> +	/*
> +	 * Firstly we detach objects and queue them over an RCU-batch
> +	 * for all CPUs. Finally queued works are flushed for each CPU.
> +	 *
> +	 * Please note. If there are outstanding batches for a particular
> +	 * CPU, those have to be finished first following by queuing a new.
> +	 */
> +	for_each_possible_cpu(cpu) {
> +		krcp = per_cpu_ptr(&krc, cpu);
> +
> +		/*
> +		 * Check if this CPU has any objects which have been queued for a
> +		 * new GP completion. If not(means nothing to detach), we are done
> +		 * with it. If any batch is pending/running for this "krcp", below
> +		 * per-cpu flush_rcu_work() waits its completion(see last step).
> +		 */
> +		if (!need_offload_krc(krcp))
> +			continue;
> +
> +		while (1) {
> +			/*
> +			 * If we are not able to queue a new RCU work it means:
> +			 * - batches for this CPU are still in flight which should
> +			 *   be flushed first and then repeat;
> +			 * - no objects to detach, because of concurrency.
> +			 */
> +			queued = kvfree_rcu_queue_batch(krcp);
> +
> +			/*
> +			 * Bail out, if there is no need to offload this "krcp"
> +			 * anymore. As noted earlier it can run concurrently.
> +			 */
> +			if (queued || !need_offload_krc(krcp))
> +				break;
> +
> +			/* There are ongoing batches. */
> +			for (i = 0; i < KFREE_N_BATCHES; i++) {
> +				krwp = &(krcp->krw_arr[i]);
> +				flush_rcu_work(&krwp->rcu_work);
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * Now we guarantee that all objects are flushed.
> +	 */
> +	for_each_possible_cpu(cpu) {
> +		krcp = per_cpu_ptr(&krc, cpu);
> +
> +		for (i = 0; i < KFREE_N_BATCHES; i++) {
> +			krwp = &(krcp->krw_arr[i]);
> +			flush_rcu_work(&krwp->rcu_work);
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
> +
>  static unsigned long
>  kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
>  {
> 
> -- 
> 2.46.0
> 
I need to send out a v2. What is a best way? Please let me know. I have not
checked where this series already landed.

Thank you!

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 5/7] rcu/kvfree: Add kvfree_rcu_barrier() API
  2024-08-09 16:26   ` [PATCH v2 5/7] rcu/kvfree: Add kvfree_rcu_barrier() API Uladzislau Rezki
@ 2024-08-09 17:00     ` Vlastimil Babka
  2024-08-20 16:02       ` Uladzislau Rezki
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-08-09 17:00 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On 8/9/24 18:26, Uladzislau Rezki wrote:
> Hello, Vlastimil!
> I need to send out a v2. What is a best way? Please let me know. I have not
> checked where this series already landed.

Hi,

you can just send it separately based on v6.11-rc2, as you did v1 and I will
replace it in the slab/for-next. Thanks!

Vlastimil

> Thank you!
> 
> --
> Uladzislau Rezki



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
  2024-08-09 15:14     ` Vlastimil Babka
@ 2024-08-10  0:11       ` Andrew Morton
  2024-08-10 20:25         ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Andrew Morton @ 2024-08-10  0:11 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Jann Horn, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Stephen Rothwell,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Mateusz Guzik

On Fri, 9 Aug 2024 17:14:40 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:

> On 8/9/24 17:12, Jann Horn wrote:
> > On Fri, Aug 9, 2024 at 5:02 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> >> On 8/7/24 12:31, Vlastimil Babka wrote:
> >> > Also in git:
> >> > https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2
> >>
> >> I've added this to slab/for-next, there will be some conflicts and here's my
> >> resulting git show or the merge commit I tried over today's next.
> >>
> >> It might look a bit different with tomorrow's next as mm will have v7 of the
> >> conflicting series from Jann:
> >>
> >> https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@suse.cz/
> >>
> >> (also I did resolve it in the way I suggested to move Jann's block before
> >> taking slab_mutex() but unless that happens in mm-unstable it would probably be more
> >> correct to keep where he did)
> > 
> > Regarding my conflicting patch: Do you want me to send a v8 of that
> > one now to move things around in my patch as you suggested? Or should
> > we do that in the slab tree after the conflict has been resolved in
> > Linus' tree, or something like that?
> > I'm not sure which way of doing this would minimize work for maintainers...
> 
> I guess it would be easiest to send a -fix to Andrew as it's rather minor
> change. Thanks!

That's quite a large conflict.  How about we carry Jann's patchset in
the slab tree?


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
  2024-08-10  0:11       ` Andrew Morton
@ 2024-08-10 20:25         ` Vlastimil Babka
  2024-08-10 20:30           ` Andrew Morton
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-08-10 20:25 UTC (permalink / raw)
  To: Andrew Morton, Stephen Rothwell
  Cc: Jann Horn, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Mateusz Guzik

On 8/10/24 2:11 AM, Andrew Morton wrote:
> On Fri, 9 Aug 2024 17:14:40 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:
> 
>> On 8/9/24 17:12, Jann Horn wrote:
>>> On Fri, Aug 9, 2024 at 5:02 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>>>> On 8/7/24 12:31, Vlastimil Babka wrote:
>>>>> Also in git:
>>>>> https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2
>>>>
>>>> I've added this to slab/for-next, there will be some conflicts and here's my
>>>> resulting git show or the merge commit I tried over today's next.
>>>>
>>>> It might look a bit different with tomorrow's next as mm will have v7 of the
>>>> conflicting series from Jann:
>>>>
>>>> https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@suse.cz/
>>>>
>>>> (also I did resolve it in the way I suggested to move Jann's block before
>>>> taking slab_mutex() but unless that happens in mm-unstable it would probably be more
>>>> correct to keep where he did)
>>>
>>> Regarding my conflicting patch: Do you want me to send a v8 of that
>>> one now to move things around in my patch as you suggested? Or should
>>> we do that in the slab tree after the conflict has been resolved in
>>> Linus' tree, or something like that?
>>> I'm not sure which way of doing this would minimize work for maintainers...
>>
>> I guess it would be easiest to send a -fix to Andrew as it's rather minor
>> change. Thanks!
> 
> That's quite a large conflict.  How about we carry Jann's patchset in
> the slab tree?

OK I've done that and pushed to slab/for-next. Had no issues applying
the kasan parts and merge with mm-unstable (locally rebased with Jann's
commits dropped) had no conflicts either so it should work fine. Thanks!


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()
  2024-08-10 20:25         ` Vlastimil Babka
@ 2024-08-10 20:30           ` Andrew Morton
  0 siblings, 0 replies; 44+ messages in thread
From: Andrew Morton @ 2024-08-10 20:30 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Stephen Rothwell, Jann Horn, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Mateusz Guzik

On Sat, 10 Aug 2024 22:25:05 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:

> >> I guess it would be easiest to send a -fix to Andrew as it's rather minor
> >> change. Thanks!
> > 
> > That's quite a large conflict.  How about we carry Jann's patchset in
> > the slab tree?
> 
> OK I've done that and pushed to slab/for-next. Had no issues applying
> the kasan parts and merge with mm-unstable (locally rebased with Jann's
> commits dropped) had no conflicts either so it should work fine. Thanks!

Cool.  I have dropped the copy of v8 from mm.git.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 5/7] rcu/kvfree: Add kvfree_rcu_barrier() API
  2024-08-09 17:00     ` Vlastimil Babka
@ 2024-08-20 16:02       ` Uladzislau Rezki
  0 siblings, 0 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2024-08-20 16:02 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

Hello, Vlastimil!

> On 8/9/24 18:26, Uladzislau Rezki wrote:
> > Hello, Vlastimil!
> > I need to send out a v2. What is a best way? Please let me know. I have not
> > checked where this series already landed.
> 
> Hi,
> 
> you can just send it separately based on v6.11-rc2, as you did v1 and I will
> replace it in the slab/for-next. Thanks!
> 
Sorry for delay. I had a vacation last week. Just posted the v2 with a fix.
Now my tests are passed!

Thanks!

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-7-ea79102f428c@suse.cz>
  2024-08-09 16:23   ` [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy() Uladzislau Rezki
@ 2024-09-14 13:22   ` Hyeonggon Yoo
  2024-09-14 18:39     ` Vlastimil Babka
  2024-09-20 13:35   ` Guenter Roeck
  2 siblings, 1 reply; 44+ messages in thread
From: Hyeonggon Yoo @ 2024-09-14 13:22 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On Wed, Aug 7, 2024 at 7:31 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> Add a test that will create cache, allocate one object, kfree_rcu() it
> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
> in kmem_cache_destroy() works correctly, there should be no warnings in
> dmesg and the test should pass.
>
> Additionally add a test_leak_destroy() test that leaks an object on
> purpose and verifies that kmem_cache_destroy() catches it.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  lib/slub_kunit.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
>

Hi Vlastimil,

I think we might need to suppress the WARN() due to the active objects
in kmem_cache_destroy()
when it's called from slub_kunit. With this change, the warning below
will be printed every time
slub_kunit is loaded, which made me wonder if there's a bug (for a while).

Actually, SLUB calls pr_err() is called by __kmem_cache_shutdown() if
there are any active objects
during destruction, and the error message is suppressed by slub_kunit.
However, kmem_cache_destroy()
still calls WARN() regardless if there is any error during shutdown.

[  147.546531] Object 0x00000000c09342ca @offset=640
[  147.546542] ------------[ cut here ]------------
[  147.546544] kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still
has objects when called from test_leak_destroy+0x74/0x108 [slub_kunit]
[  147.546579] WARNING: CPU: 5 PID: 39703 at mm/slab_common.c:507
kmem_cache_destroy+0x174/0x188
[  147.546587] Modules linked in: slub_kunit uinput snd_seq_dummy
snd_hrtimer rfkill nf_conntrack_netbios_ns nf_conntrack_broadcast
nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet
nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct sunrpc nft_chain_nat
nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables
nfnetlink qrtr binfmt_misc vfat fat snd_hda_codec_generic
snd_hda_intel snd_intel_dspcfg snd_hda_codec uvcvideo snd_hda_core uvc
snd_hwdep videobuf2_vmalloc snd_seq videobuf2_memops snd_seq_device
videobuf2_v4l2 snd_pcm videobuf2_common snd_timer snd videodev mc
soundcore virtio_balloon acpi_tad joydev loop zram virtio_gpu
ahci_platform libahci_platform virtio_dma_buf crct10dif_ce polyval_ce
polyval_generic ghash_ce sha3_ce virtio_net sha512_ce net_failover
sha512_arm64 failover virtio_mmio ip6_tables ip_tables fuse
[  147.546646] CPU: 5 UID: 0 PID: 39703 Comm: kunit_try_catch Tainted:
G                 N 6.11.0-rc7-next-20240912 #2
[  147.546649] Tainted: [N]=TEST
[  147.546650] Hardware name: Parallels International GmbH. Parallels
ARM Virtual Machine/Parallels ARM Virtual Platform, BIOS 20.0.0
(55653) Thu, 05 Sep 202
[  147.546652] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[  147.546655] pc : kmem_cache_destroy+0x174/0x188
[  147.546657] lr : kmem_cache_destroy+0x174/0x188
[  147.546659] sp : ffff80008aba3d60
[  147.546660] x29: ffff80008aba3d60 x28: 0000000000000000 x27: 0000000000000000
[  147.546662] x26: 0000000000000000 x25: 0000000000000000 x24: ffff800094a2b438
[  147.546665] x23: ffff80008089b750 x22: 0000000000000001 x21: f9cc80007c1782f4
[  147.546666] x20: ffff800082f9d088 x19: ffff0000c2308b00 x18: 00000000fffffffd
[  147.546668] x17: 0000000046d4ed9c x16: 00000000ae1ad4db x15: ffff80008aba3430
[  147.546670] x14: 0000000000000001 x13: ffff80008aba3657 x12: ffff800082f0f060
[  147.546679] x11: 0000000000000001 x10: 0000000000000001 x9 : ffff8000801652c8
[  147.546682] x8 : c0000000ffffdfff x7 : ffff800082e5ee68 x6 : 00000000000affa8
[  147.546684] x5 : ffff00031fc70448 x4 : 0000000000000000 x3 : ffff80029d6b7000
[  147.546686] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff00011f1c8000
[  147.546688] Call trace:
[  147.546689]  kmem_cache_destroy+0x174/0x188
[  147.546692]  test_leak_destroy+0x74/0x108 [slub_kunit]
[  147.546693]  kunit_try_run_case+0x74/0x170
[  147.546697]  kunit_generic_run_threadfn_adapter+0x30/0x60
[  147.546699]  kthread+0xf4/0x108
[  147.546705]  ret_from_fork+0x10/0x20
[  147.546710] ---[ end trace 0000000000000000 ]---


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-14 13:22   ` Hyeonggon Yoo
@ 2024-09-14 18:39     ` Vlastimil Babka
  0 siblings, 0 replies; 44+ messages in thread
From: Vlastimil Babka @ 2024-09-14 18:39 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On 9/14/24 15:22, Hyeonggon Yoo wrote:
> On Wed, Aug 7, 2024 at 7:31 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>> Add a test that will create cache, allocate one object, kfree_rcu() it
>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>> in kmem_cache_destroy() works correctly, there should be no warnings in
>> dmesg and the test should pass.
>>
>> Additionally add a test_leak_destroy() test that leaks an object on
>> purpose and verifies that kmem_cache_destroy() catches it.
>>
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  lib/slub_kunit.c | 31 +++++++++++++++++++++++++++++++
>>  1 file changed, 31 insertions(+)
>>
> 
> Hi Vlastimil,
> 
> I think we might need to suppress the WARN() due to the active objects
> in kmem_cache_destroy()
> when it's called from slub_kunit. With this change, the warning below
> will be printed every time
> slub_kunit is loaded, which made me wonder if there's a bug (for a while).
> 
> Actually, SLUB calls pr_err() is called by __kmem_cache_shutdown() if
> there are any active objects
> during destruction, and the error message is suppressed by slub_kunit.
> However, kmem_cache_destroy()
> still calls WARN() regardless if there is any error during shutdown.

Yeah, there was a LKP report about it already and I wanted to handle this
but forgot. It's not wrong to produce warnings during the tests, for example
the KASAN tests generate tons of them. But it's true that we suppress them
for slub and should continue so for consistency and not having to teach lkp
what can be ignored.

But I think it's fine if we add the suppressing during the rc stabilization
phase so will send the PR for merge window as it is, too late now.

Want to take a stab at the patch? :)

Vlastimil

> [  147.546531] Object 0x00000000c09342ca @offset=640
> [  147.546542] ------------[ cut here ]------------
> [  147.546544] kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still
> has objects when called from test_leak_destroy+0x74/0x108 [slub_kunit]
> [  147.546579] WARNING: CPU: 5 PID: 39703 at mm/slab_common.c:507
> kmem_cache_destroy+0x174/0x188
> [  147.546587] Modules linked in: slub_kunit uinput snd_seq_dummy
> snd_hrtimer rfkill nf_conntrack_netbios_ns nf_conntrack_broadcast
> nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet
> nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct sunrpc nft_chain_nat
> nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables
> nfnetlink qrtr binfmt_misc vfat fat snd_hda_codec_generic
> snd_hda_intel snd_intel_dspcfg snd_hda_codec uvcvideo snd_hda_core uvc
> snd_hwdep videobuf2_vmalloc snd_seq videobuf2_memops snd_seq_device
> videobuf2_v4l2 snd_pcm videobuf2_common snd_timer snd videodev mc
> soundcore virtio_balloon acpi_tad joydev loop zram virtio_gpu
> ahci_platform libahci_platform virtio_dma_buf crct10dif_ce polyval_ce
> polyval_generic ghash_ce sha3_ce virtio_net sha512_ce net_failover
> sha512_arm64 failover virtio_mmio ip6_tables ip_tables fuse
> [  147.546646] CPU: 5 UID: 0 PID: 39703 Comm: kunit_try_catch Tainted:
> G                 N 6.11.0-rc7-next-20240912 #2
> [  147.546649] Tainted: [N]=TEST
> [  147.546650] Hardware name: Parallels International GmbH. Parallels
> ARM Virtual Machine/Parallels ARM Virtual Platform, BIOS 20.0.0
> (55653) Thu, 05 Sep 202
> [  147.546652] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
> [  147.546655] pc : kmem_cache_destroy+0x174/0x188
> [  147.546657] lr : kmem_cache_destroy+0x174/0x188
> [  147.546659] sp : ffff80008aba3d60
> [  147.546660] x29: ffff80008aba3d60 x28: 0000000000000000 x27: 0000000000000000
> [  147.546662] x26: 0000000000000000 x25: 0000000000000000 x24: ffff800094a2b438
> [  147.546665] x23: ffff80008089b750 x22: 0000000000000001 x21: f9cc80007c1782f4
> [  147.546666] x20: ffff800082f9d088 x19: ffff0000c2308b00 x18: 00000000fffffffd
> [  147.546668] x17: 0000000046d4ed9c x16: 00000000ae1ad4db x15: ffff80008aba3430
> [  147.546670] x14: 0000000000000001 x13: ffff80008aba3657 x12: ffff800082f0f060
> [  147.546679] x11: 0000000000000001 x10: 0000000000000001 x9 : ffff8000801652c8
> [  147.546682] x8 : c0000000ffffdfff x7 : ffff800082e5ee68 x6 : 00000000000affa8
> [  147.546684] x5 : ffff00031fc70448 x4 : 0000000000000000 x3 : ffff80029d6b7000
> [  147.546686] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff00011f1c8000
> [  147.546688] Call trace:
> [  147.546689]  kmem_cache_destroy+0x174/0x188
> [  147.546692]  test_leak_destroy+0x74/0x108 [slub_kunit]
> [  147.546693]  kunit_try_run_case+0x74/0x170
> [  147.546697]  kunit_generic_run_threadfn_adapter+0x30/0x60
> [  147.546699]  kthread+0xf4/0x108
> [  147.546705]  ret_from_fork+0x10/0x20
> [  147.546710] ---[ end trace 0000000000000000 ]---



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-7-ea79102f428c@suse.cz>
  2024-08-09 16:23   ` [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy() Uladzislau Rezki
  2024-09-14 13:22   ` Hyeonggon Yoo
@ 2024-09-20 13:35   ` Guenter Roeck
  2024-09-21 20:40     ` Vlastimil Babka
  2 siblings, 1 reply; 44+ messages in thread
From: Guenter Roeck @ 2024-09-20 13:35 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

Hi,

On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
> Add a test that will create cache, allocate one object, kfree_rcu() it
> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
> in kmem_cache_destroy() works correctly, there should be no warnings in
> dmesg and the test should pass.
> 
> Additionally add a test_leak_destroy() test that leaks an object on
> purpose and verifies that kmem_cache_destroy() catches it.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

This test case, when run, triggers a warning traceback.

kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4

That is, however, not the worst of it. It also causes boot stalls on
several platforms and architectures (various arm platforms, arm64,
loongarch, various ppc, and various x86_64). Reverting it fixes the
problem. Bisect results are attached for reference.

Guenter

---
# bad: [baeb9a7d8b60b021d907127509c44507539c15e5] Merge tag 'sched-rt-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
# good: [2f27fce67173bbb05d5a0ee03dae5c021202c912] Merge tag 'sound-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
git bisect start 'HEAD' '2f27fce67173'
# good: [ae2c6d8b3b88c176dff92028941a4023f1b4cb91] Merge tag 'drm-xe-next-fixes-2024-09-12' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
git bisect good ae2c6d8b3b88c176dff92028941a4023f1b4cb91
# bad: [c8d8a35d094626808cd07ed0758e14c7e4cf61ac] Merge tag 'livepatching-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/livepatching/livepatching
git bisect bad c8d8a35d094626808cd07ed0758e14c7e4cf61ac
# bad: [cc52dc2fe39ff5dee9916ac2d9381ec3cbf650c0] Merge tag 'pwm/for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ukleinek/linux
git bisect bad cc52dc2fe39ff5dee9916ac2d9381ec3cbf650c0
# bad: [bdf56c7580d267a123cc71ca0f2459c797b76fde] Merge tag 'slab-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
git bisect bad bdf56c7580d267a123cc71ca0f2459c797b76fde
# good: [355debb83bf79853cde43579f88eed16adb1da29] Merge branches 'context_tracking.15.08.24a', 'csd.lock.15.08.24a', 'nocb.09.09.24a', 'rcutorture.14.08.24a', 'rcustall.09.09.24a', 'srcu.12.08.24a', 'rcu.tasks.14.08.24a', 'rcu_scaling_tests.15.08.24a', 'fixes.12.08.24a' and 'misc.11.08.24a' into next.09.09.24a
git bisect good 355debb83bf79853cde43579f88eed16adb1da29
# good: [067610ebaaec53809794807842a2fcf5f1f5b9eb] Merge tag 'rcu.release.v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
git bisect good 067610ebaaec53809794807842a2fcf5f1f5b9eb
# good: [4b7ff9ab98af11a477d50f08382bcc4c2f899926] mm, slab: restore kerneldoc for kmem_cache_create()
git bisect good 4b7ff9ab98af11a477d50f08382bcc4c2f899926
# bad: [a715e94dbda4ece41aac49b7b7ff8ddb55a7fe08] Merge branch 'slab/for-6.12/rcu_barriers' into slab/for-next
git bisect bad a715e94dbda4ece41aac49b7b7ff8ddb55a7fe08
# bad: [b3c34245756adada8a50bdaedbb3965b071c7b0a] kasan: catch invalid free before SLUB reinitializes the object
git bisect bad b3c34245756adada8a50bdaedbb3965b071c7b0a
# good: [2eb14c1c2717396f2fb1e4a4c5a1ec87cdd174f6] mm, slab: reintroduce rcu_barrier() into kmem_cache_destroy()
git bisect good 2eb14c1c2717396f2fb1e4a4c5a1ec87cdd174f6
# good: [6c6c47b063b593785202be158e61fe5c827d6677] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
git bisect good 6c6c47b063b593785202be158e61fe5c827d6677
# bad: [4e1c44b3db79ba910adec32e2e1b920a0e34890a] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
git bisect bad 4e1c44b3db79ba910adec32e2e1b920a0e34890a
# first bad commit: [4e1c44b3db79ba910adec32e2e1b920a0e34890a] kunit, slub: add test_kfree_rcu() and test_leak_destroy()


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-20 13:35   ` Guenter Roeck
@ 2024-09-21 20:40     ` Vlastimil Babka
  2024-09-21 21:08       ` Guenter Roeck
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-09-21 20:40 UTC (permalink / raw)
  To: Guenter Roeck, KUnit Development, Brendan Higgins, David Gow
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

+CC kunit folks

On 9/20/24 15:35, Guenter Roeck wrote:
> Hi,

Hi,

> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
>> Add a test that will create cache, allocate one object, kfree_rcu() it
>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>> in kmem_cache_destroy() works correctly, there should be no warnings in
>> dmesg and the test should pass.
>> 
>> Additionally add a test_leak_destroy() test that leaks an object on
>> purpose and verifies that kmem_cache_destroy() catches it.
>> 
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> 
> This test case, when run, triggers a warning traceback.
> 
> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4

Yes that should be suppressed like the other slub_kunit tests do. I have
assumed it's not that urgent because for example the KASAN kunit tests all
produce tons of warnings and thus assumed it's in some way acceptable for
kunit tests to do.

> That is, however, not the worst of it. It also causes boot stalls on
> several platforms and architectures (various arm platforms, arm64,
> loongarch, various ppc, and various x86_64). Reverting it fixes the
> problem. Bisect results are attached for reference.

OK, this part is unexpected. I assume you have the test built-in and not a
module, otherwise it can't affect boot? And by stall you mean a delay or a
complete lockup? I've tried to reproduce that with virtme, but it seemed
fine, maybe it's .config specific?

I do wonder about the placement of the call of kunit_run_all_tests() from
kernel_init_freeable() as that's before a bunch of initialization finishes.

For example, system_state = SYSTEM_RUNNING; and rcu_end_inkernel_boot() only
happens later in kernel_init(). I wouldn't be surprised if that means
calling kfree_rcu() or rcu_barrier() or kvfree_rcu_barrier() as part of the
slub tests is too early.

Does the diff below fix the problem? Any advice from kunit folks? I could
perhaps possibly make the slab test module-only instead of tristate or do
some ifdef builtin on the problematic tests, but maybe it wouldn't be
necessary with kunit_run_all_tests() happening a bit later.

----8<----
diff --git a/init/main.c b/init/main.c
index c4778edae797..7890ebb00e84 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1489,6 +1489,8 @@ static int __ref kernel_init(void *unused)
 
 	rcu_end_inkernel_boot();
 
+	kunit_run_all_tests();
+
 	do_sysctl_args();
 
 	if (ramdisk_execute_command) {
@@ -1579,8 +1581,6 @@ static noinline void __init kernel_init_freeable(void)
 
 	do_basic_setup();
 
-	kunit_run_all_tests();
-
 	wait_for_initramfs();
 	console_on_rootfs();
 



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-21 20:40     ` Vlastimil Babka
@ 2024-09-21 21:08       ` Guenter Roeck
  2024-09-21 21:25         ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Guenter Roeck @ 2024-09-21 21:08 UTC (permalink / raw)
  To: Vlastimil Babka, KUnit Development, Brendan Higgins, David Gow
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

On 9/21/24 13:40, Vlastimil Babka wrote:
> +CC kunit folks
> 
> On 9/20/24 15:35, Guenter Roeck wrote:
>> Hi,
> 
> Hi,
> 
>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
>>> Add a test that will create cache, allocate one object, kfree_rcu() it
>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>>> in kmem_cache_destroy() works correctly, there should be no warnings in
>>> dmesg and the test should pass.
>>>
>>> Additionally add a test_leak_destroy() test that leaks an object on
>>> purpose and verifies that kmem_cache_destroy() catches it.
>>>
>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>>
>> This test case, when run, triggers a warning traceback.
>>
>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
> 
> Yes that should be suppressed like the other slub_kunit tests do. I have
> assumed it's not that urgent because for example the KASAN kunit tests all
> produce tons of warnings and thus assumed it's in some way acceptable for
> kunit tests to do.
> 

I have all tests which generate warning backtraces disabled. Trying to identify
which warnings are noise and which warnings are on purpose doesn't scale,
so it is all or nothing for me. I tried earlier to introduce a patch series
which would enable selective backtrace suppression, but that died the death
of architecture maintainers not caring and people demanding it to be perfect
(meaning it only addressed WARNING: backtraces and not BUG: backtraces,
and apparently that wasn't good enough).

If the backtrace is intentional (and I think you are saying that it is),
I'll simply disable the test. That may be a bit counter-productive, but
there is really no alternative for me.

>> That is, however, not the worst of it. It also causes boot stalls on
>> several platforms and architectures (various arm platforms, arm64,
>> loongarch, various ppc, and various x86_64). Reverting it fixes the
>> problem. Bisect results are attached for reference.
> 
> OK, this part is unexpected. I assume you have the test built-in and not a
> module, otherwise it can't affect boot? And by stall you mean a delay or a

Yes.

> complete lockup? I've tried to reproduce that with virtme, but it seemed
> fine, maybe it's .config specific?

It is a complete lockup.

> 
> I do wonder about the placement of the call of kunit_run_all_tests() from
> kernel_init_freeable() as that's before a bunch of initialization finishes.
> 
> For example, system_state = SYSTEM_RUNNING; and rcu_end_inkernel_boot() only
> happens later in kernel_init(). I wouldn't be surprised if that means
> calling kfree_rcu() or rcu_barrier() or kvfree_rcu_barrier() as part of the
> slub tests is too early.
> 
> Does the diff below fix the problem? Any advice from kunit folks? I could
> perhaps possibly make the slab test module-only instead of tristate or do
> some ifdef builtin on the problematic tests, but maybe it wouldn't be
> necessary with kunit_run_all_tests() happening a bit later.
> 

It does, at least based on my limited testing. However, given that the
backtrace is apparently intentional, it doesn't really matter - I'll disable
the test instead.

Thanks,
Guenter



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-21 21:08       ` Guenter Roeck
@ 2024-09-21 21:25         ` Vlastimil Babka
  2024-09-22  6:16           ` Hyeonggon Yoo
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-09-21 21:25 UTC (permalink / raw)
  To: Guenter Roeck, KUnit Development, Brendan Higgins, David Gow
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik

On 9/21/24 23:08, Guenter Roeck wrote:
> On 9/21/24 13:40, Vlastimil Babka wrote:
>> +CC kunit folks
>> 
>> On 9/20/24 15:35, Guenter Roeck wrote:
>>> Hi,
>> 
>> Hi,
>> 
>>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
>>>> Add a test that will create cache, allocate one object, kfree_rcu() it
>>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>>>> in kmem_cache_destroy() works correctly, there should be no warnings in
>>>> dmesg and the test should pass.
>>>>
>>>> Additionally add a test_leak_destroy() test that leaks an object on
>>>> purpose and verifies that kmem_cache_destroy() catches it.
>>>>
>>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>>>
>>> This test case, when run, triggers a warning traceback.
>>>
>>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
>>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
>> 
>> Yes that should be suppressed like the other slub_kunit tests do. I have
>> assumed it's not that urgent because for example the KASAN kunit tests all
>> produce tons of warnings and thus assumed it's in some way acceptable for
>> kunit tests to do.
>> 
> 
> I have all tests which generate warning backtraces disabled. Trying to identify
> which warnings are noise and which warnings are on purpose doesn't scale,
> so it is all or nothing for me. I tried earlier to introduce a patch series
> which would enable selective backtrace suppression, but that died the death
> of architecture maintainers not caring and people demanding it to be perfect
> (meaning it only addressed WARNING: backtraces and not BUG: backtraces,
> and apparently that wasn't good enough).

Ah, didn't know, too bad.

> If the backtrace is intentional (and I think you are saying that it is),
> I'll simply disable the test. That may be a bit counter-productive, but
> there is really no alternative for me.

It's intentional in the sense that the test intentionally triggers a
condition that normally produces a warning. Many if the slub kunit test do
that, but are able to suppress printing the warning when it happens in the
kunit context. I forgot to do that for the new test initially as the warning
there happens from a different path that those that already have the kunit
suppression, but we'll implement that suppression there too ASAP.

>>> That is, however, not the worst of it. It also causes boot stalls on
>>> several platforms and architectures (various arm platforms, arm64,
>>> loongarch, various ppc, and various x86_64). Reverting it fixes the
>>> problem. Bisect results are attached for reference.
>> 
>> OK, this part is unexpected. I assume you have the test built-in and not a
>> module, otherwise it can't affect boot? And by stall you mean a delay or a
> 
> Yes.
> 
>> complete lockup? I've tried to reproduce that with virtme, but it seemed
>> fine, maybe it's .config specific?
> 
> It is a complete lockup.
> 
>> 
>> I do wonder about the placement of the call of kunit_run_all_tests() from
>> kernel_init_freeable() as that's before a bunch of initialization finishes.
>> 
>> For example, system_state = SYSTEM_RUNNING; and rcu_end_inkernel_boot() only
>> happens later in kernel_init(). I wouldn't be surprised if that means
>> calling kfree_rcu() or rcu_barrier() or kvfree_rcu_barrier() as part of the
>> slub tests is too early.
>> 
>> Does the diff below fix the problem? Any advice from kunit folks? I could
>> perhaps possibly make the slab test module-only instead of tristate or do
>> some ifdef builtin on the problematic tests, but maybe it wouldn't be
>> necessary with kunit_run_all_tests() happening a bit later.
>> 
> 
> It does, at least based on my limited testing. However, given that the
> backtrace is apparently intentional, it doesn't really matter - I'll disable
> the test instead.

Right, thanks. I will notify you when the suppression is done so the test
can be re-enabled.

But as the lockup should not be caused by the warning itself, we will still
have to address it, as others configuring kunit tests built-in might see the
lockup, or you would again see it as well, once the warning is addressed and
test re-enabled.

Your testing suggests moving the kunit_run_all_tests() call might be the
right fix so let's see what kunit folks think. I would hope that no other
kunit test would become broken by being executed later in boot, because when
compiled as modules, it already happens even much later already.

Thanks!
Vlastimil

> Thanks,
> Guenter
> 



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-21 21:25         ` Vlastimil Babka
@ 2024-09-22  6:16           ` Hyeonggon Yoo
  2024-09-22 14:13             ` Guenter Roeck
  0 siblings, 1 reply; 44+ messages in thread
From: Hyeonggon Yoo @ 2024-09-22  6:16 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Guenter Roeck, KUnit Development, Brendan Higgins, David Gow,
	Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On Sun, Sep 22, 2024 at 6:25 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 9/21/24 23:08, Guenter Roeck wrote:
> > On 9/21/24 13:40, Vlastimil Babka wrote:
> >> +CC kunit folks
> >>
> >> On 9/20/24 15:35, Guenter Roeck wrote:
> >>> Hi,
> >>
> >> Hi,
> >>
> >>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
> >>>> Add a test that will create cache, allocate one object, kfree_rcu() it
> >>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
> >>>> in kmem_cache_destroy() works correctly, there should be no warnings in
> >>>> dmesg and the test should pass.
> >>>>
> >>>> Additionally add a test_leak_destroy() test that leaks an object on
> >>>> purpose and verifies that kmem_cache_destroy() catches it.
> >>>>
> >>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >>>
> >>> This test case, when run, triggers a warning traceback.
> >>>
> >>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
> >>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
> >>
> >> Yes that should be suppressed like the other slub_kunit tests do. I have
> >> assumed it's not that urgent because for example the KASAN kunit tests all
> >> produce tons of warnings and thus assumed it's in some way acceptable for
> >> kunit tests to do.
> >>
> >
> > I have all tests which generate warning backtraces disabled. Trying to identify
> > which warnings are noise and which warnings are on purpose doesn't scale,
> > so it is all or nothing for me. I tried earlier to introduce a patch series
> > which would enable selective backtrace suppression, but that died the death
> > of architecture maintainers not caring and people demanding it to be perfect
> > (meaning it only addressed WARNING: backtraces and not BUG: backtraces,
> > and apparently that wasn't good enough).
>
> Ah, didn't know, too bad.
>
> > If the backtrace is intentional (and I think you are saying that it is),
> > I'll simply disable the test. That may be a bit counter-productive, but
> > there is really no alternative for me.
>
> It's intentional in the sense that the test intentionally triggers a
> condition that normally produces a warning. Many if the slub kunit test do
> that, but are able to suppress printing the warning when it happens in the
> kunit context. I forgot to do that for the new test initially as the warning
> there happens from a different path that those that already have the kunit
> suppression, but we'll implement that suppression there too ASAP.

We might also need to address the concern of the commit
7302e91f39a ("mm/slab_common: use WARN() if cache still has objects on
destroy"),
the concern that some users prefer WARN() over pr_err() to catch
errors on testing systems
which relies on WARN() format, and to respect panic_on_warn.

So we might need to call WARN() instead of pr_err() if there are errors in
slub error handling code in general, except when running kunit tests?

Thanks,
Hyeonggon


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-22  6:16           ` Hyeonggon Yoo
@ 2024-09-22 14:13             ` Guenter Roeck
  2024-09-25 12:56               ` Hyeonggon Yoo
  0 siblings, 1 reply; 44+ messages in thread
From: Guenter Roeck @ 2024-09-22 14:13 UTC (permalink / raw)
  To: Hyeonggon Yoo, Vlastimil Babka
  Cc: KUnit Development, Brendan Higgins, David Gow, Paul E. McKenney,
	Joel Fernandes, Josh Triplett, Boqun Feng, Christoph Lameter,
	David Rientjes, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Zqiang, Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On 9/21/24 23:16, Hyeonggon Yoo wrote:
> On Sun, Sep 22, 2024 at 6:25 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>> On 9/21/24 23:08, Guenter Roeck wrote:
>>> On 9/21/24 13:40, Vlastimil Babka wrote:
>>>> +CC kunit folks
>>>>
>>>> On 9/20/24 15:35, Guenter Roeck wrote:
>>>>> Hi,
>>>>
>>>> Hi,
>>>>
>>>>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
>>>>>> Add a test that will create cache, allocate one object, kfree_rcu() it
>>>>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>>>>>> in kmem_cache_destroy() works correctly, there should be no warnings in
>>>>>> dmesg and the test should pass.
>>>>>>
>>>>>> Additionally add a test_leak_destroy() test that leaks an object on
>>>>>> purpose and verifies that kmem_cache_destroy() catches it.
>>>>>>
>>>>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>>>>>
>>>>> This test case, when run, triggers a warning traceback.
>>>>>
>>>>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
>>>>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
>>>>
>>>> Yes that should be suppressed like the other slub_kunit tests do. I have
>>>> assumed it's not that urgent because for example the KASAN kunit tests all
>>>> produce tons of warnings and thus assumed it's in some way acceptable for
>>>> kunit tests to do.
>>>>
>>>
>>> I have all tests which generate warning backtraces disabled. Trying to identify
>>> which warnings are noise and which warnings are on purpose doesn't scale,
>>> so it is all or nothing for me. I tried earlier to introduce a patch series
>>> which would enable selective backtrace suppression, but that died the death
>>> of architecture maintainers not caring and people demanding it to be perfect
>>> (meaning it only addressed WARNING: backtraces and not BUG: backtraces,
>>> and apparently that wasn't good enough).
>>
>> Ah, didn't know, too bad.
>>
>>> If the backtrace is intentional (and I think you are saying that it is),
>>> I'll simply disable the test. That may be a bit counter-productive, but
>>> there is really no alternative for me.
>>
>> It's intentional in the sense that the test intentionally triggers a
>> condition that normally produces a warning. Many if the slub kunit test do
>> that, but are able to suppress printing the warning when it happens in the
>> kunit context. I forgot to do that for the new test initially as the warning
>> there happens from a different path that those that already have the kunit
>> suppression, but we'll implement that suppression there too ASAP.
> 
> We might also need to address the concern of the commit
> 7302e91f39a ("mm/slab_common: use WARN() if cache still has objects on
> destroy"),
> the concern that some users prefer WARN() over pr_err() to catch
> errors on testing systems
> which relies on WARN() format, and to respect panic_on_warn.
> 
> So we might need to call WARN() instead of pr_err() if there are errors in
> slub error handling code in general, except when running kunit tests?
> 

If people _want_ to see WARNING backtraces generated on purpose, so be it.
For me it means that _real_ WARNING backtraces disappear in the noise.
Manually maintaining a list of expected warning backtraces is too maintenance
expensive for me, so I simply disable all kunit tests which generate
backtraces on purpose. That is just me, though. Other testbeds may have
more resources available and may be perfectly happy with the associated
maintenance cost.

In this specific case, I now have disabled slub kunit tests, and, as
mentioned before, from my perspective there is no need to change the
code just to accommodate my needs. I'll do the same with all other new
unit tests which generate backtraces in the future, without bothering
anyone.

Sorry for the noise.

Thanks,
Guenter



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-22 14:13             ` Guenter Roeck
@ 2024-09-25 12:56               ` Hyeonggon Yoo
  2024-09-26 12:54                 ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Hyeonggon Yoo @ 2024-09-25 12:56 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Vlastimil Babka, KUnit Development, Brendan Higgins, David Gow,
	Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On Sun, Sep 22, 2024 at 11:13 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 9/21/24 23:16, Hyeonggon Yoo wrote:
> > On Sun, Sep 22, 2024 at 6:25 AM Vlastimil Babka <vbabka@suse.cz> wrote:
> >>
> >> On 9/21/24 23:08, Guenter Roeck wrote:
> >>> On 9/21/24 13:40, Vlastimil Babka wrote:
> >>>> +CC kunit folks
> >>>>
> >>>> On 9/20/24 15:35, Guenter Roeck wrote:
> >>>>> Hi,
> >>>>
> >>>> Hi,
> >>>>
> >>>>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
> >>>>>> Add a test that will create cache, allocate one object, kfree_rcu() it
> >>>>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
> >>>>>> in kmem_cache_destroy() works correctly, there should be no warnings in
> >>>>>> dmesg and the test should pass.
> >>>>>>
> >>>>>> Additionally add a test_leak_destroy() test that leaks an object on
> >>>>>> purpose and verifies that kmem_cache_destroy() catches it.
> >>>>>>
> >>>>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >>>>>
> >>>>> This test case, when run, triggers a warning traceback.
> >>>>>
> >>>>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
> >>>>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
> >>>>
> >>>> Yes that should be suppressed like the other slub_kunit tests do. I have
> >>>> assumed it's not that urgent because for example the KASAN kunit tests all
> >>>> produce tons of warnings and thus assumed it's in some way acceptable for
> >>>> kunit tests to do.
> >>>>
> >>>
> >>> I have all tests which generate warning backtraces disabled. Trying to identify
> >>> which warnings are noise and which warnings are on purpose doesn't scale,
> >>> so it is all or nothing for me. I tried earlier to introduce a patch series
> >>> which would enable selective backtrace suppression, but that died the death
> >>> of architecture maintainers not caring and people demanding it to be perfect
> >>> (meaning it only addressed WARNING: backtraces and not BUG: backtraces,
> >>> and apparently that wasn't good enough).
> >>
> >> Ah, didn't know, too bad.
> >>
> >>> If the backtrace is intentional (and I think you are saying that it is),
> >>> I'll simply disable the test. That may be a bit counter-productive, but
> >>> there is really no alternative for me.
> >>
> >> It's intentional in the sense that the test intentionally triggers a
> >> condition that normally produces a warning. Many if the slub kunit test do
> >> that, but are able to suppress printing the warning when it happens in the
> >> kunit context. I forgot to do that for the new test initially as the warning
> >> there happens from a different path that those that already have the kunit
> >> suppression, but we'll implement that suppression there too ASAP.
> >
> > We might also need to address the concern of the commit
> > 7302e91f39a ("mm/slab_common: use WARN() if cache still has objects on
> > destroy"),
> > the concern that some users prefer WARN() over pr_err() to catch
> > errors on testing systems
> > which relies on WARN() format, and to respect panic_on_warn.
> >
> > So we might need to call WARN() instead of pr_err() if there are errors in
> > slub error handling code in general, except when running kunit tests?
> >
>
> If people _want_ to see WARNING backtraces generated on purpose, so be it.
> For me it means that _real_ WARNING backtraces disappear in the noise.
> Manually maintaining a list of expected warning backtraces is too maintenance
> expensive for me, so I simply disable all kunit tests which generate
> backtraces on purpose. That is just me, though. Other testbeds may have
> more resources available and may be perfectly happy with the associated
> maintenance cost.
>
> In this specific case, I now have disabled slub kunit tests, and, as
> mentioned before, from my perspective there is no need to change the
> code just to accommodate my needs. I'll do the same with all other new
> unit tests which generate backtraces in the future, without bothering
> anyone.
>
> Sorry for the noise.

I don't think this was a noise :) IMO some people want to see WARNING
during testing to catch errors,
but not for the slub_kunit test case. I think a proper approach here
would be suppressing
warnings while running slub_kunit test cases, but print WARNING when
it is not running slub_kunit test cases.

That would require some work changing the slub error reporting logic
to print WARNING on certain errors.
Any opinions, Vlastimil?

Thanks,
Hyeonggon


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-25 12:56               ` Hyeonggon Yoo
@ 2024-09-26 12:54                 ` Vlastimil Babka
  2024-09-30  8:47                   ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2024-09-26 12:54 UTC (permalink / raw)
  To: Hyeonggon Yoo, Guenter Roeck
  Cc: KUnit Development, Brendan Higgins, David Gow, Paul E. McKenney,
	Joel Fernandes, Josh Triplett, Boqun Feng, Christoph Lameter,
	David Rientjes, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Zqiang, Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On 9/25/24 14:56, Hyeonggon Yoo wrote:
> On Sun, Sep 22, 2024 at 11:13 PM Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On 9/21/24 23:16, Hyeonggon Yoo wrote:
>> > On Sun, Sep 22, 2024 at 6:25 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>> >>
>> >> On 9/21/24 23:08, Guenter Roeck wrote:
>> >>> On 9/21/24 13:40, Vlastimil Babka wrote:
>> >>>> +CC kunit folks
>> >>>>
>> >>>> On 9/20/24 15:35, Guenter Roeck wrote:
>> >>>>> Hi,
>> >>>>
>> >>>> Hi,
>> >>>>
>> >>>>> On Wed, Aug 07, 2024 at 12:31:20PM +0200, Vlastimil Babka wrote:
>> >>>>>> Add a test that will create cache, allocate one object, kfree_rcu() it
>> >>>>>> and attempt to destroy it. As long as the usage of kvfree_rcu_barrier()
>> >>>>>> in kmem_cache_destroy() works correctly, there should be no warnings in
>> >>>>>> dmesg and the test should pass.
>> >>>>>>
>> >>>>>> Additionally add a test_leak_destroy() test that leaks an object on
>> >>>>>> purpose and verifies that kmem_cache_destroy() catches it.
>> >>>>>>
>> >>>>>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> >>>>>
>> >>>>> This test case, when run, triggers a warning traceback.
>> >>>>>
>> >>>>> kmem_cache_destroy TestSlub_kfree_rcu: Slab cache still has objects when called from test_leak_destroy+0x70/0x11c
>> >>>>> WARNING: CPU: 0 PID: 715 at mm/slab_common.c:511 kmem_cache_destroy+0x1dc/0x1e4
>> >>>>
>> >>>> Yes that should be suppressed like the other slub_kunit tests do. I have
>> >>>> assumed it's not that urgent because for example the KASAN kunit tests all
>> >>>> produce tons of warnings and thus assumed it's in some way acceptable for
>> >>>> kunit tests to do.
>> >>>>
>> >>>
>> >>> I have all tests which generate warning backtraces disabled. Trying to identify
>> >>> which warnings are noise and which warnings are on purpose doesn't scale,
>> >>> so it is all or nothing for me. I tried earlier to introduce a patch series
>> >>> which would enable selective backtrace suppression, but that died the death
>> >>> of architecture maintainers not caring and people demanding it to be perfect
>> >>> (meaning it only addressed WARNING: backtraces and not BUG: backtraces,
>> >>> and apparently that wasn't good enough).
>> >>
>> >> Ah, didn't know, too bad.
>> >>
>> >>> If the backtrace is intentional (and I think you are saying that it is),
>> >>> I'll simply disable the test. That may be a bit counter-productive, but
>> >>> there is really no alternative for me.
>> >>
>> >> It's intentional in the sense that the test intentionally triggers a
>> >> condition that normally produces a warning. Many if the slub kunit test do
>> >> that, but are able to suppress printing the warning when it happens in the
>> >> kunit context. I forgot to do that for the new test initially as the warning
>> >> there happens from a different path that those that already have the kunit
>> >> suppression, but we'll implement that suppression there too ASAP.
>> >
>> > We might also need to address the concern of the commit
>> > 7302e91f39a ("mm/slab_common: use WARN() if cache still has objects on
>> > destroy"),
>> > the concern that some users prefer WARN() over pr_err() to catch
>> > errors on testing systems
>> > which relies on WARN() format, and to respect panic_on_warn.
>> >
>> > So we might need to call WARN() instead of pr_err() if there are errors in
>> > slub error handling code in general, except when running kunit tests?
>> >
>>
>> If people _want_ to see WARNING backtraces generated on purpose, so be it.
>> For me it means that _real_ WARNING backtraces disappear in the noise.
>> Manually maintaining a list of expected warning backtraces is too maintenance
>> expensive for me, so I simply disable all kunit tests which generate
>> backtraces on purpose. That is just me, though. Other testbeds may have
>> more resources available and may be perfectly happy with the associated
>> maintenance cost.
>>
>> In this specific case, I now have disabled slub kunit tests, and, as
>> mentioned before, from my perspective there is no need to change the
>> code just to accommodate my needs. I'll do the same with all other new
>> unit tests which generate backtraces in the future, without bothering
>> anyone.
>>
>> Sorry for the noise.
> 
> I don't think this was a noise :) IMO some people want to see WARNING
> during testing to catch errors,
> but not for the slub_kunit test case. I think a proper approach here
> would be suppressing
> warnings while running slub_kunit test cases, but print WARNING when
> it is not running slub_kunit test cases.
> 
> That would require some work changing the slub error reporting logic
> to print WARNING on certain errors.
> Any opinions, Vlastimil?

Yes, we should suppress the existing warning on kmem_cache_destroy() in
kunit test context, and separately we can change pr_err() to WARN() as long
as they are still suppressed in kunit test context.

> Thanks,
> Hyeonggon



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy()
  2024-09-26 12:54                 ` Vlastimil Babka
@ 2024-09-30  8:47                   ` Vlastimil Babka
  0 siblings, 0 replies; 44+ messages in thread
From: Vlastimil Babka @ 2024-09-30  8:47 UTC (permalink / raw)
  To: Hyeonggon Yoo, Guenter Roeck
  Cc: KUnit Development, Brendan Higgins, David Gow, Paul E. McKenney,
	Joel Fernandes, Josh Triplett, Boqun Feng, Christoph Lameter,
	David Rientjes, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Zqiang, Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik

On 9/26/24 14:54, Vlastimil Babka wrote:
> On 9/25/24 14:56, Hyeonggon Yoo wrote:
>> 
>> I don't think this was a noise :) IMO some people want to see WARNING
>> during testing to catch errors,
>> but not for the slub_kunit test case. I think a proper approach here
>> would be suppressing
>> warnings while running slub_kunit test cases, but print WARNING when
>> it is not running slub_kunit test cases.
>> 
>> That would require some work changing the slub error reporting logic
>> to print WARNING on certain errors.
>> Any opinions, Vlastimil?
> 
> Yes, we should suppress the existing warning on kmem_cache_destroy() in

Done here:
https://lore.kernel.org/all/20240930-b4-slub-kunit-fix-v1-0-32ca9dbbbc11@suse.cz/

> kunit test context, and separately we can change pr_err() to WARN() as long
> as they are still suppressed in kunit test context.

Can be done later separately as it would be out of the scope for a rcX fix.

>> Thanks,
>> Hyeonggon
> 



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
       [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-6-ea79102f428c@suse.cz>
@ 2025-02-21 16:30   ` Keith Busch
  2025-02-21 16:51     ` Mateusz Guzik
  2025-02-21 17:28     ` Vlastimil Babka
  0 siblings, 2 replies; 44+ messages in thread
From: Keith Busch @ 2025-02-21 16:30 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik, linux-nvme,
	leitao

On Wed, Aug 07, 2024 at 12:31:19PM +0200, Vlastimil Babka wrote:
> We would like to replace call_rcu() users with kfree_rcu() where the
> existing callback is just a kmem_cache_free(). However this causes
> issues when the cache can be destroyed (such as due to module unload).
> 
> Currently such modules should be issuing rcu_barrier() before
> kmem_cache_destroy() to have their call_rcu() callbacks processed first.
> This barrier is however not sufficient for kfree_rcu() in flight due
> to the batching introduced by a35d16905efc ("rcu: Add basic support for
> kfree_rcu() batching").
> 
> This is not a problem for kmalloc caches which are never destroyed, but
> since removing SLOB, kfree_rcu() is allowed also for any other cache,
> that might be destroyed.
> 
> In order not to complicate the API, put the responsibility for handling
> outstanding kfree_rcu() in kmem_cache_destroy() itself. Use the newly
> introduced kvfree_rcu_barrier() to wait before destroying the cache.
> This is similar to how we issue rcu_barrier() for SLAB_TYPESAFE_BY_RCU
> caches, but has to be done earlier, as the latter only needs to wait for
> the empty slab pages to finish freeing, and not objects from the slab.
> 
> Users of call_rcu() with arbitrary callbacks should still issue
> rcu_barrier() before destroying the cache and unloading the module, as
> kvfree_rcu_barrier() is not a superset of rcu_barrier() and the
> callbacks may be invoking module code or performing other actions that
> are necessary for a successful unload.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slab_common.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index c40227d5fa07..1a2873293f5d 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -508,6 +508,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
>  	if (unlikely(!s) || !kasan_check_byte(s))
>  		return;
>  
> +	/* in-flight kfree_rcu()'s may include objects from our cache */
> +	kvfree_rcu_barrier();
> +
>  	cpus_read_lock();
>  	mutex_lock(&slab_mutex);

This patch appears to be triggering a new warning in certain conditions
when tearing down an nvme namespace's block device. Stack trace is at
the end.

The warning indicates that this shouldn't be called from a
WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
and tearing down block devices, so this is a memory reclaim use AIUI.
I'm a bit confused why we can't tear down a disk from within a memory
reclaim workqueue. Is the recommended solution to simply remove the WQ
flag when creating the workqueue?

  ------------[ cut here ]------------
  workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
  WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120
  Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) skx_edac_common(E) nfit(E) libnvdimm(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) iTCO_wdt(E) xhci_pci(E) mlx5_ib(E) ipmi_si(E) iTCO_vendor_support(E) i2c_i801(E) ipmi_devintf(E) evdev(E) kvm(E) xhci_hcd(E) ib_uverbs(E) acpi_cpufreq(E) wmi(E) i2c_smbus(E) ipmi_msghandler(E) button(E) efivarfs(E) autofs4(E)
  CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G            E      6.13.2-0_g925d379822da #1
  Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023
  Workqueue: nvme-wq nvme_scan_work
  RIP: 0010:check_flush_dependency+0x112/0x120
  Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 c7 c7 90 64 5a 82 49 89 d8 e8 7e 4f 88 ff <0f> 0b eb 8c cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 41 57 41
  RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082
  RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027
  RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88
  RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd
  R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400
  R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000
  FS:  0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000
  CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0
  PKRU: 55555554
  Call Trace:
   <TASK>
   ? __warn+0xa4/0x140
   ? check_flush_dependency+0x112/0x120
   ? report_bug+0xe1/0x140
   ? check_flush_dependency+0x112/0x120
   ? handle_bug+0x5e/0x90
   ? exc_invalid_op+0x16/0x40
   ? asm_exc_invalid_op+0x16/0x20
   ? timer_recalc_next_expiry+0x190/0x190
   ? check_flush_dependency+0x112/0x120
   ? check_flush_dependency+0x112/0x120
   __flush_work.llvm.1643880146586177030+0x174/0x2c0
   flush_rcu_work+0x28/0x30
   kvfree_rcu_barrier+0x12f/0x160
   kmem_cache_destroy+0x18/0x120
   bioset_exit+0x10c/0x150
   disk_release.llvm.6740012984264378178+0x61/0xd0
   device_release+0x4f/0x90
   kobject_put+0x95/0x180
   nvme_put_ns+0x23/0xc0
   nvme_remove_invalid_namespaces+0xb3/0xd0
   nvme_scan_work+0x342/0x490
   process_scheduled_works+0x1a2/0x370
   worker_thread+0x2ff/0x390
   ? pwq_release_workfn+0x1e0/0x1e0
   kthread+0xb1/0xe0
   ? __kthread_parkme+0x70/0x70
   ret_from_fork+0x30/0x40
   ? __kthread_parkme+0x70/0x70
   ret_from_fork_asm+0x11/0x20
   </TASK>
  ---[ end trace 0000000000000000 ]---


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-21 16:30   ` [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy() Keith Busch
@ 2025-02-21 16:51     ` Mateusz Guzik
  2025-02-21 16:52       ` Mateusz Guzik
  2025-02-21 17:28     ` Vlastimil Babka
  1 sibling, 1 reply; 44+ messages in thread
From: Mateusz Guzik @ 2025-02-21 16:51 UTC (permalink / raw)
  To: Keith Busch
  Cc: Vlastimil Babka, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, linux-nvme, leitao

On Fri, Feb 21, 2025 at 5:30 PM Keith Busch <kbusch@kernel.org> wrote:
> This patch appears to be triggering a new warning in certain conditions
> when tearing down an nvme namespace's block device. Stack trace is at
> the end.
>
> The warning indicates that this shouldn't be called from a
> WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> and tearing down block devices, so this is a memory reclaim use AIUI.
> I'm a bit confused why we can't tear down a disk from within a memory
> reclaim workqueue. Is the recommended solution to simply remove the WQ
> flag when creating the workqueue?
>

This ends up calling into bioset_exit -> bio_put_slab -> kmem_cache_destroy

Sizes of the bio- slabs are off the beaten path, so it may be they
make sense to exist.

With the assumption that caches should be there, this can instead
invoke kmem_cache_destroy from a queue where it is safe to do it. This
is not supposed to be a frequent operation.
-- 
Mateusz Guzik <mjguzik gmail.com>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-21 16:51     ` Mateusz Guzik
@ 2025-02-21 16:52       ` Mateusz Guzik
  0 siblings, 0 replies; 44+ messages in thread
From: Mateusz Guzik @ 2025-02-21 16:52 UTC (permalink / raw)
  To: Keith Busch
  Cc: Vlastimil Babka, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, linux-nvme, leitao

On Fri, Feb 21, 2025 at 5:51 PM Mateusz Guzik <mjguzik@gmail.com> wrote:
>
> On Fri, Feb 21, 2025 at 5:30 PM Keith Busch <kbusch@kernel.org> wrote:
> > This patch appears to be triggering a new warning in certain conditions
> > when tearing down an nvme namespace's block device. Stack trace is at
> > the end.
> >
> > The warning indicates that this shouldn't be called from a
> > WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> > and tearing down block devices, so this is a memory reclaim use AIUI.
> > I'm a bit confused why we can't tear down a disk from within a memory
> > reclaim workqueue. Is the recommended solution to simply remove the WQ
> > flag when creating the workqueue?
> >
>
> This ends up calling into bioset_exit -> bio_put_slab -> kmem_cache_destroy
>
> Sizes of the bio- slabs are off the beaten path, so it may be they
> make sense to exist.
>
> With the assumption that caches should be there, this can instead
> invoke kmem_cache_destroy from a queue where it is safe to do it. This
> is not supposed to be a frequent operation.

Erm, I mean defer the operation to a queue which can safely do it.

-- 
Mateusz Guzik <mjguzik gmail.com>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-21 16:30   ` [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy() Keith Busch
  2025-02-21 16:51     ` Mateusz Guzik
@ 2025-02-21 17:28     ` Vlastimil Babka
  2025-02-24 11:44       ` Uladzislau Rezki
  1 sibling, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-21 17:28 UTC (permalink / raw)
  To: Keith Busch
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik, linux-nvme,
	leitao

On 2/21/25 17:30, Keith Busch wrote:
> On Wed, Aug 07, 2024 at 12:31:19PM +0200, Vlastimil Babka wrote:
>> We would like to replace call_rcu() users with kfree_rcu() where the
>> existing callback is just a kmem_cache_free(). However this causes
>> issues when the cache can be destroyed (such as due to module unload).
>> 
>> Currently such modules should be issuing rcu_barrier() before
>> kmem_cache_destroy() to have their call_rcu() callbacks processed first.
>> This barrier is however not sufficient for kfree_rcu() in flight due
>> to the batching introduced by a35d16905efc ("rcu: Add basic support for
>> kfree_rcu() batching").
>> 
>> This is not a problem for kmalloc caches which are never destroyed, but
>> since removing SLOB, kfree_rcu() is allowed also for any other cache,
>> that might be destroyed.
>> 
>> In order not to complicate the API, put the responsibility for handling
>> outstanding kfree_rcu() in kmem_cache_destroy() itself. Use the newly
>> introduced kvfree_rcu_barrier() to wait before destroying the cache.
>> This is similar to how we issue rcu_barrier() for SLAB_TYPESAFE_BY_RCU
>> caches, but has to be done earlier, as the latter only needs to wait for
>> the empty slab pages to finish freeing, and not objects from the slab.
>> 
>> Users of call_rcu() with arbitrary callbacks should still issue
>> rcu_barrier() before destroying the cache and unloading the module, as
>> kvfree_rcu_barrier() is not a superset of rcu_barrier() and the
>> callbacks may be invoking module code or performing other actions that
>> are necessary for a successful unload.
>> 
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  mm/slab_common.c | 3 +++
>>  1 file changed, 3 insertions(+)
>> 
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index c40227d5fa07..1a2873293f5d 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -508,6 +508,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
>>  	if (unlikely(!s) || !kasan_check_byte(s))
>>  		return;
>>  
>> +	/* in-flight kfree_rcu()'s may include objects from our cache */
>> +	kvfree_rcu_barrier();
>> +
>>  	cpus_read_lock();
>>  	mutex_lock(&slab_mutex);
> 
> This patch appears to be triggering a new warning in certain conditions
> when tearing down an nvme namespace's block device. Stack trace is at
> the end.
> 
> The warning indicates that this shouldn't be called from a
> WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> and tearing down block devices, so this is a memory reclaim use AIUI.
> I'm a bit confused why we can't tear down a disk from within a memory
> reclaim workqueue. Is the recommended solution to simply remove the WQ
> flag when creating the workqueue?

I think it's reasonable to expect a memory reclaim related action would
destroy a kmem cache. Mateusz's suggestion would work around the issue, but
then we could get another surprising warning elsewhere. Also making the
kmem_cache destroys async can be tricky when a recreation happens
immediately under the same name (implications with sysfs/debugfs etc). We
managed to make the destroying synchronous as part of this series and it
would be great to keep it that way.

>   ------------[ cut here ]------------
>   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work

Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
is after all freeing memory. Ulad, what do you think?

>   WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120
>   Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) skx_edac_common(E) nfit(E) libnvdimm(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) iTCO_wdt(E) xhci_pci(E) mlx5_ib(E) ipmi_si(E) iTCO_vendor_support(E) i2c_i801(E) ipmi_devintf(E) evdev(E) kvm(E) xhci_hcd(E) ib_uverbs(E) acpi_cpufreq(E) wmi(E) i2c_smbus(E) ipmi_msghandler(E) button(E) efivarfs(E) autofs4(E)
>   CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G            E      6.13.2-0_g925d379822da #1
>   Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023
>   Workqueue: nvme-wq nvme_scan_work
>   RIP: 0010:check_flush_dependency+0x112/0x120
>   Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 c7 c7 90 64 5a 82 49 89 d8 e8 7e 4f 88 ff <0f> 0b eb 8c cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 41 57 41
>   RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082
>   RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027
>   RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88
>   RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd
>   R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400
>   R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000
>   FS:  0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000
>   CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0
>   PKRU: 55555554
>   Call Trace:
>    <TASK>
>    ? __warn+0xa4/0x140
>    ? check_flush_dependency+0x112/0x120
>    ? report_bug+0xe1/0x140
>    ? check_flush_dependency+0x112/0x120
>    ? handle_bug+0x5e/0x90
>    ? exc_invalid_op+0x16/0x40
>    ? asm_exc_invalid_op+0x16/0x20
>    ? timer_recalc_next_expiry+0x190/0x190
>    ? check_flush_dependency+0x112/0x120
>    ? check_flush_dependency+0x112/0x120
>    __flush_work.llvm.1643880146586177030+0x174/0x2c0
>    flush_rcu_work+0x28/0x30
>    kvfree_rcu_barrier+0x12f/0x160
>    kmem_cache_destroy+0x18/0x120
>    bioset_exit+0x10c/0x150
>    disk_release.llvm.6740012984264378178+0x61/0xd0
>    device_release+0x4f/0x90
>    kobject_put+0x95/0x180
>    nvme_put_ns+0x23/0xc0
>    nvme_remove_invalid_namespaces+0xb3/0xd0
>    nvme_scan_work+0x342/0x490
>    process_scheduled_works+0x1a2/0x370
>    worker_thread+0x2ff/0x390
>    ? pwq_release_workfn+0x1e0/0x1e0
>    kthread+0xb1/0xe0
>    ? __kthread_parkme+0x70/0x70
>    ret_from_fork+0x30/0x40
>    ? __kthread_parkme+0x70/0x70
>    ret_from_fork_asm+0x11/0x20
>    </TASK>
>   ---[ end trace 0000000000000000 ]---



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-21 17:28     ` Vlastimil Babka
@ 2025-02-24 11:44       ` Uladzislau Rezki
  2025-02-24 15:37         ` Keith Busch
  2025-02-25  9:57         ` Vlastimil Babka
  0 siblings, 2 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-24 11:44 UTC (permalink / raw)
  To: Vlastimil Babka, Keith Busch
  Cc: Keith Busch, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik, linux-nvme,
	leitao

On Fri, Feb 21, 2025 at 06:28:49PM +0100, Vlastimil Babka wrote:
> On 2/21/25 17:30, Keith Busch wrote:
> > On Wed, Aug 07, 2024 at 12:31:19PM +0200, Vlastimil Babka wrote:
> >> We would like to replace call_rcu() users with kfree_rcu() where the
> >> existing callback is just a kmem_cache_free(). However this causes
> >> issues when the cache can be destroyed (such as due to module unload).
> >> 
> >> Currently such modules should be issuing rcu_barrier() before
> >> kmem_cache_destroy() to have their call_rcu() callbacks processed first.
> >> This barrier is however not sufficient for kfree_rcu() in flight due
> >> to the batching introduced by a35d16905efc ("rcu: Add basic support for
> >> kfree_rcu() batching").
> >> 
> >> This is not a problem for kmalloc caches which are never destroyed, but
> >> since removing SLOB, kfree_rcu() is allowed also for any other cache,
> >> that might be destroyed.
> >> 
> >> In order not to complicate the API, put the responsibility for handling
> >> outstanding kfree_rcu() in kmem_cache_destroy() itself. Use the newly
> >> introduced kvfree_rcu_barrier() to wait before destroying the cache.
> >> This is similar to how we issue rcu_barrier() for SLAB_TYPESAFE_BY_RCU
> >> caches, but has to be done earlier, as the latter only needs to wait for
> >> the empty slab pages to finish freeing, and not objects from the slab.
> >> 
> >> Users of call_rcu() with arbitrary callbacks should still issue
> >> rcu_barrier() before destroying the cache and unloading the module, as
> >> kvfree_rcu_barrier() is not a superset of rcu_barrier() and the
> >> callbacks may be invoking module code or performing other actions that
> >> are necessary for a successful unload.
> >> 
> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >> ---
> >>  mm/slab_common.c | 3 +++
> >>  1 file changed, 3 insertions(+)
> >> 
> >> diff --git a/mm/slab_common.c b/mm/slab_common.c
> >> index c40227d5fa07..1a2873293f5d 100644
> >> --- a/mm/slab_common.c
> >> +++ b/mm/slab_common.c
> >> @@ -508,6 +508,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >>  	if (unlikely(!s) || !kasan_check_byte(s))
> >>  		return;
> >>  
> >> +	/* in-flight kfree_rcu()'s may include objects from our cache */
> >> +	kvfree_rcu_barrier();
> >> +
> >>  	cpus_read_lock();
> >>  	mutex_lock(&slab_mutex);
> > 
> > This patch appears to be triggering a new warning in certain conditions
> > when tearing down an nvme namespace's block device. Stack trace is at
> > the end.
> > 
> > The warning indicates that this shouldn't be called from a
> > WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> > and tearing down block devices, so this is a memory reclaim use AIUI.
> > I'm a bit confused why we can't tear down a disk from within a memory
> > reclaim workqueue. Is the recommended solution to simply remove the WQ
> > flag when creating the workqueue?
> 
> I think it's reasonable to expect a memory reclaim related action would
> destroy a kmem cache. Mateusz's suggestion would work around the issue, but
> then we could get another surprising warning elsewhere. Also making the
> kmem_cache destroys async can be tricky when a recreation happens
> immediately under the same name (implications with sysfs/debugfs etc). We
> managed to make the destroying synchronous as part of this series and it
> would be great to keep it that way.
> 
> >   ------------[ cut here ]------------
> >   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> 
> Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
> is after all freeing memory. Ulad, what do you think?
> 
We reclaim memory, therefore WQ_MEM_RECLAIM seems what we need.
AFAIR, there is an extra rescue worker, which can really help
under a low memory condition in a way that we do a progress.

Do we have a reproducer of mentioned splat?

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-24 11:44       ` Uladzislau Rezki
@ 2025-02-24 15:37         ` Keith Busch
  2025-02-25  9:57         ` Vlastimil Babka
  1 sibling, 0 replies; 44+ messages in thread
From: Keith Busch @ 2025-02-24 15:37 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Vlastimil Babka, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Mon, Feb 24, 2025 at 12:44:46PM +0100, Uladzislau Rezki wrote:
> On Fri, Feb 21, 2025 at 06:28:49PM +0100, Vlastimil Babka wrote:
> > > 
> > > The warning indicates that this shouldn't be called from a
> > > WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> > > and tearing down block devices, so this is a memory reclaim use AIUI.
> > > I'm a bit confused why we can't tear down a disk from within a memory
> > > reclaim workqueue. Is the recommended solution to simply remove the WQ
> > > flag when creating the workqueue?
> > 
> > I think it's reasonable to expect a memory reclaim related action would
> > destroy a kmem cache. Mateusz's suggestion would work around the issue, but
> > then we could get another surprising warning elsewhere. Also making the
> > kmem_cache destroys async can be tricky when a recreation happens
> > immediately under the same name (implications with sysfs/debugfs etc). We
> > managed to make the destroying synchronous as part of this series and it
> > would be great to keep it that way.
> > 
> > >   ------------[ cut here ]------------
> > >   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> > 
> > Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
> > is after all freeing memory. Ulad, what do you think?
> > 
> We reclaim memory, therefore WQ_MEM_RECLAIM seems what we need.
> AFAIR, there is an extra rescue worker, which can really help
> under a low memory condition in a way that we do a progress.
> 
> Do we have a reproducer of mentioned splat?

We're observing this happen in production, and I'm trying to get more
details on what is going on there. The stack trace says that the nvme
controller deleted a namespace, and it happens to also be the last disk
that drops the slab's final ref, which deletes the kmem_cache. I think
this must be part of some automated reimaging process, as the disk is
immediately recreated followed by a kexec.

Trying to manually recreate this hasn't been successful so far because
it's never the last disk on my test machines, so I'm always seeing a
non-zero ref when deleting namespaces from this nvme workqueue.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-24 11:44       ` Uladzislau Rezki
  2025-02-24 15:37         ` Keith Busch
@ 2025-02-25  9:57         ` Vlastimil Babka
  2025-02-25 13:39           ` Uladzislau Rezki
  2025-02-25 16:03           ` Keith Busch
  1 sibling, 2 replies; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-25  9:57 UTC (permalink / raw)
  To: Uladzislau Rezki, Keith Busch
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/24/25 12:44, Uladzislau Rezki wrote:
> On Fri, Feb 21, 2025 at 06:28:49PM +0100, Vlastimil Babka wrote:
>> On 2/21/25 17:30, Keith Busch wrote:
>> > On Wed, Aug 07, 2024 at 12:31:19PM +0200, Vlastimil Babka wrote:
>> >> We would like to replace call_rcu() users with kfree_rcu() where the
>> >> existing callback is just a kmem_cache_free(). However this causes
>> >> issues when the cache can be destroyed (such as due to module unload).
>> >> 
>> >> Currently such modules should be issuing rcu_barrier() before
>> >> kmem_cache_destroy() to have their call_rcu() callbacks processed first.
>> >> This barrier is however not sufficient for kfree_rcu() in flight due
>> >> to the batching introduced by a35d16905efc ("rcu: Add basic support for
>> >> kfree_rcu() batching").
>> >> 
>> >> This is not a problem for kmalloc caches which are never destroyed, but
>> >> since removing SLOB, kfree_rcu() is allowed also for any other cache,
>> >> that might be destroyed.
>> >> 
>> >> In order not to complicate the API, put the responsibility for handling
>> >> outstanding kfree_rcu() in kmem_cache_destroy() itself. Use the newly
>> >> introduced kvfree_rcu_barrier() to wait before destroying the cache.
>> >> This is similar to how we issue rcu_barrier() for SLAB_TYPESAFE_BY_RCU
>> >> caches, but has to be done earlier, as the latter only needs to wait for
>> >> the empty slab pages to finish freeing, and not objects from the slab.
>> >> 
>> >> Users of call_rcu() with arbitrary callbacks should still issue
>> >> rcu_barrier() before destroying the cache and unloading the module, as
>> >> kvfree_rcu_barrier() is not a superset of rcu_barrier() and the
>> >> callbacks may be invoking module code or performing other actions that
>> >> are necessary for a successful unload.
>> >> 
>> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> >> ---
>> >>  mm/slab_common.c | 3 +++
>> >>  1 file changed, 3 insertions(+)
>> >> 
>> >> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> >> index c40227d5fa07..1a2873293f5d 100644
>> >> --- a/mm/slab_common.c
>> >> +++ b/mm/slab_common.c
>> >> @@ -508,6 +508,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
>> >>  	if (unlikely(!s) || !kasan_check_byte(s))
>> >>  		return;
>> >>  
>> >> +	/* in-flight kfree_rcu()'s may include objects from our cache */
>> >> +	kvfree_rcu_barrier();
>> >> +
>> >>  	cpus_read_lock();
>> >>  	mutex_lock(&slab_mutex);
>> > 
>> > This patch appears to be triggering a new warning in certain conditions
>> > when tearing down an nvme namespace's block device. Stack trace is at
>> > the end.
>> > 
>> > The warning indicates that this shouldn't be called from a
>> > WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
>> > and tearing down block devices, so this is a memory reclaim use AIUI.
>> > I'm a bit confused why we can't tear down a disk from within a memory
>> > reclaim workqueue. Is the recommended solution to simply remove the WQ
>> > flag when creating the workqueue?
>> 
>> I think it's reasonable to expect a memory reclaim related action would
>> destroy a kmem cache. Mateusz's suggestion would work around the issue, but
>> then we could get another surprising warning elsewhere. Also making the
>> kmem_cache destroys async can be tricky when a recreation happens
>> immediately under the same name (implications with sysfs/debugfs etc). We
>> managed to make the destroying synchronous as part of this series and it
>> would be great to keep it that way.
>> 
>> >   ------------[ cut here ]------------
>> >   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
>> 
>> Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
>> is after all freeing memory. Ulad, what do you think?
>> 
> We reclaim memory, therefore WQ_MEM_RECLAIM seems what we need.
> AFAIR, there is an extra rescue worker, which can really help
> under a low memory condition in a way that we do a progress.
> 
> Do we have a reproducer of mentioned splat?

I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
it's too simple, or racy, and thus we are not flushing any of the queues from
kvfree_rcu_barrier()?

----8<----
From 1e19ea78e7fe254034970f75e3b7cb705be50163 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 25 Feb 2025 10:51:28 +0100
Subject: [PATCH] add test for kmem_cache_destroy in a workqueue

---
 lib/slub_kunit.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index f11691315c2f..5fe9775fba05 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include "../mm/slab.h"
 
 static struct kunit_resource resource;
@@ -181,6 +182,52 @@ static void test_kfree_rcu(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 0, slab_errors);
 }
 
+struct cache_destroy_work {
+        struct work_struct work;
+        struct kmem_cache *s;
+};
+
+static void cache_destroy_workfn(struct work_struct *w)
+{
+	struct cache_destroy_work *cdw;
+
+	cdw = container_of(w, struct cache_destroy_work, work);
+
+	kmem_cache_destroy(cdw->s);
+}
+
+static void test_kfree_rcu_wq_destroy(struct kunit *test)
+{
+	struct test_kfree_rcu_struct *p;
+	struct cache_destroy_work cdw;
+	struct workqueue_struct *wq;
+	struct kmem_cache *s;
+
+	if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
+		kunit_skip(test, "can't do kfree_rcu() when test is built-in");
+
+	INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
+	wq = alloc_workqueue("test_kfree_rcu_destroy_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+	if (!wq)
+		kunit_skip(test, "failed to alloc wq");
+
+	s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
+				   sizeof(struct test_kfree_rcu_struct),
+				   SLAB_NO_MERGE);
+	p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kfree_rcu(p, rcu);
+
+	cdw.s = s;
+	queue_work(wq, &cdw.work);
+	msleep(1000);
+	flush_work(&cdw.work);
+
+	destroy_workqueue(wq);
+
+	KUNIT_EXPECT_EQ(test, 0, slab_errors);
+}
+
 static void test_leak_destroy(struct kunit *test)
 {
 	struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
@@ -254,6 +301,7 @@ static struct kunit_case test_cases[] = {
 	KUNIT_CASE(test_clobber_redzone_free),
 	KUNIT_CASE(test_kmalloc_redzone_access),
 	KUNIT_CASE(test_kfree_rcu),
+	KUNIT_CASE(test_kfree_rcu_wq_destroy),
 	KUNIT_CASE(test_leak_destroy),
 	KUNIT_CASE(test_krealloc_redzone_zeroing),
 	{}
-- 
2.48.1




^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25  9:57         ` Vlastimil Babka
@ 2025-02-25 13:39           ` Uladzislau Rezki
  2025-02-25 14:12             ` Vlastimil Babka
  2025-02-25 16:03           ` Keith Busch
  1 sibling, 1 reply; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-25 13:39 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Keith Busch, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> On 2/24/25 12:44, Uladzislau Rezki wrote:
> > On Fri, Feb 21, 2025 at 06:28:49PM +0100, Vlastimil Babka wrote:
> >> On 2/21/25 17:30, Keith Busch wrote:
> >> > On Wed, Aug 07, 2024 at 12:31:19PM +0200, Vlastimil Babka wrote:
> >> >> We would like to replace call_rcu() users with kfree_rcu() where the
> >> >> existing callback is just a kmem_cache_free(). However this causes
> >> >> issues when the cache can be destroyed (such as due to module unload).
> >> >> 
> >> >> Currently such modules should be issuing rcu_barrier() before
> >> >> kmem_cache_destroy() to have their call_rcu() callbacks processed first.
> >> >> This barrier is however not sufficient for kfree_rcu() in flight due
> >> >> to the batching introduced by a35d16905efc ("rcu: Add basic support for
> >> >> kfree_rcu() batching").
> >> >> 
> >> >> This is not a problem for kmalloc caches which are never destroyed, but
> >> >> since removing SLOB, kfree_rcu() is allowed also for any other cache,
> >> >> that might be destroyed.
> >> >> 
> >> >> In order not to complicate the API, put the responsibility for handling
> >> >> outstanding kfree_rcu() in kmem_cache_destroy() itself. Use the newly
> >> >> introduced kvfree_rcu_barrier() to wait before destroying the cache.
> >> >> This is similar to how we issue rcu_barrier() for SLAB_TYPESAFE_BY_RCU
> >> >> caches, but has to be done earlier, as the latter only needs to wait for
> >> >> the empty slab pages to finish freeing, and not objects from the slab.
> >> >> 
> >> >> Users of call_rcu() with arbitrary callbacks should still issue
> >> >> rcu_barrier() before destroying the cache and unloading the module, as
> >> >> kvfree_rcu_barrier() is not a superset of rcu_barrier() and the
> >> >> callbacks may be invoking module code or performing other actions that
> >> >> are necessary for a successful unload.
> >> >> 
> >> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >> >> ---
> >> >>  mm/slab_common.c | 3 +++
> >> >>  1 file changed, 3 insertions(+)
> >> >> 
> >> >> diff --git a/mm/slab_common.c b/mm/slab_common.c
> >> >> index c40227d5fa07..1a2873293f5d 100644
> >> >> --- a/mm/slab_common.c
> >> >> +++ b/mm/slab_common.c
> >> >> @@ -508,6 +508,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >> >>  	if (unlikely(!s) || !kasan_check_byte(s))
> >> >>  		return;
> >> >>  
> >> >> +	/* in-flight kfree_rcu()'s may include objects from our cache */
> >> >> +	kvfree_rcu_barrier();
> >> >> +
> >> >>  	cpus_read_lock();
> >> >>  	mutex_lock(&slab_mutex);
> >> > 
> >> > This patch appears to be triggering a new warning in certain conditions
> >> > when tearing down an nvme namespace's block device. Stack trace is at
> >> > the end.
> >> > 
> >> > The warning indicates that this shouldn't be called from a
> >> > WQ_MEM_RECLAIM workqueue. This workqueue is responsible for bringing up
> >> > and tearing down block devices, so this is a memory reclaim use AIUI.
> >> > I'm a bit confused why we can't tear down a disk from within a memory
> >> > reclaim workqueue. Is the recommended solution to simply remove the WQ
> >> > flag when creating the workqueue?
> >> 
> >> I think it's reasonable to expect a memory reclaim related action would
> >> destroy a kmem cache. Mateusz's suggestion would work around the issue, but
> >> then we could get another surprising warning elsewhere. Also making the
> >> kmem_cache destroys async can be tricky when a recreation happens
> >> immediately under the same name (implications with sysfs/debugfs etc). We
> >> managed to make the destroying synchronous as part of this series and it
> >> would be great to keep it that way.
> >> 
> >> >   ------------[ cut here ]------------
> >> >   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> >> 
> >> Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
> >> is after all freeing memory. Ulad, what do you think?
> >> 
> > We reclaim memory, therefore WQ_MEM_RECLAIM seems what we need.
> > AFAIR, there is an extra rescue worker, which can really help
> > under a low memory condition in a way that we do a progress.
> > 
> > Do we have a reproducer of mentioned splat?
> 
> I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> it's too simple, or racy, and thus we are not flushing any of the queues from
> kvfree_rcu_barrier()?
> 
See some comments below. I will try to reproduce it today. But from the
first glance it should trigger it.

> ----8<----
> From 1e19ea78e7fe254034970f75e3b7cb705be50163 Mon Sep 17 00:00:00 2001
> From: Vlastimil Babka <vbabka@suse.cz>
> Date: Tue, 25 Feb 2025 10:51:28 +0100
> Subject: [PATCH] add test for kmem_cache_destroy in a workqueue
> 
> ---
>  lib/slub_kunit.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
> 
> diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
> index f11691315c2f..5fe9775fba05 100644
> --- a/lib/slub_kunit.c
> +++ b/lib/slub_kunit.c
> @@ -6,6 +6,7 @@
>  #include <linux/module.h>
>  #include <linux/kernel.h>
>  #include <linux/rcupdate.h>
> +#include <linux/delay.h>
>  #include "../mm/slab.h"
>  
>  static struct kunit_resource resource;
> @@ -181,6 +182,52 @@ static void test_kfree_rcu(struct kunit *test)
>  	KUNIT_EXPECT_EQ(test, 0, slab_errors);
>  }
>  
> +struct cache_destroy_work {
> +        struct work_struct work;
> +        struct kmem_cache *s;
> +};
> +
> +static void cache_destroy_workfn(struct work_struct *w)
> +{
> +	struct cache_destroy_work *cdw;
> +
> +	cdw = container_of(w, struct cache_destroy_work, work);
> +
> +	kmem_cache_destroy(cdw->s);
> +}
> +
> +static void test_kfree_rcu_wq_destroy(struct kunit *test)
> +{
> +	struct test_kfree_rcu_struct *p;
> +	struct cache_destroy_work cdw;
> +	struct workqueue_struct *wq;
> +	struct kmem_cache *s;
> +
> +	if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
> +		kunit_skip(test, "can't do kfree_rcu() when test is built-in");
> +
> +	INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
> +	wq = alloc_workqueue("test_kfree_rcu_destroy_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
>
Maybe it is worth to add WQ_HIGHPRI also to be ahead?

> +	if (!wq)
> +		kunit_skip(test, "failed to alloc wq");
> +
> +	s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
> +				   sizeof(struct test_kfree_rcu_struct),
> +				   SLAB_NO_MERGE);
> +	p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	kfree_rcu(p, rcu);
> +
> +	cdw.s = s;
> +	queue_work(wq, &cdw.work);
> +	msleep(1000);
I am not sure it is needed. From the other hand it does nothing if
i do not miss anything.

> +	flush_work(&cdw.work);
> +
> +	destroy_workqueue(wq);
> +
> +	KUNIT_EXPECT_EQ(test, 0, slab_errors);
> +}
> +
>  static void test_leak_destroy(struct kunit *test)
>  {
>  	struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
> @@ -254,6 +301,7 @@ static struct kunit_case test_cases[] = {
>  	KUNIT_CASE(test_clobber_redzone_free),
>  	KUNIT_CASE(test_kmalloc_redzone_access),
>  	KUNIT_CASE(test_kfree_rcu),
> +	KUNIT_CASE(test_kfree_rcu_wq_destroy),
>  	KUNIT_CASE(test_leak_destroy),
>  	KUNIT_CASE(test_krealloc_redzone_zeroing),
>  	{}
> -- 
> 2.48.1
> 
> 

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 13:39           ` Uladzislau Rezki
@ 2025-02-25 14:12             ` Vlastimil Babka
  0 siblings, 0 replies; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-25 14:12 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Keith Busch, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/25/25 14:39, Uladzislau Rezki wrote:
> On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
>> On 2/24/25 12:44, Uladzislau Rezki wrote:
>> > On Fri, Feb 21, 2025 at 06:28:49PM +0100, Vlastimil Babka wrote:
>> >> On 2/21/25 17:30, Keith Busch wrote:
>> >> >   ------------[ cut here ]------------
>> >> >   workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
>> >> 
>> >> Maybe instead kfree_rcu_work should be using a WQ_MEM_RECLAIM workqueue? It
>> >> is after all freeing memory. Ulad, what do you think?
>> >> 
>> > We reclaim memory, therefore WQ_MEM_RECLAIM seems what we need.
>> > AFAIR, there is an extra rescue worker, which can really help
>> > under a low memory condition in a way that we do a progress.
>> > 
>> > Do we have a reproducer of mentioned splat?
>> 
>> I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
>> it's too simple, or racy, and thus we are not flushing any of the queues from
>> kvfree_rcu_barrier()?
>> 
> See some comments below. I will try to reproduce it today. But from the
> first glance it should trigger it.
> 
>> ----8<----
>> From 1e19ea78e7fe254034970f75e3b7cb705be50163 Mon Sep 17 00:00:00 2001
>> From: Vlastimil Babka <vbabka@suse.cz>
>> Date: Tue, 25 Feb 2025 10:51:28 +0100
>> Subject: [PATCH] add test for kmem_cache_destroy in a workqueue
>> 
>> ---
>>  lib/slub_kunit.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 48 insertions(+)
>> 
>> diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
>> index f11691315c2f..5fe9775fba05 100644
>> --- a/lib/slub_kunit.c
>> +++ b/lib/slub_kunit.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/module.h>
>>  #include <linux/kernel.h>
>>  #include <linux/rcupdate.h>
>> +#include <linux/delay.h>
>>  #include "../mm/slab.h"
>>  
>>  static struct kunit_resource resource;
>> @@ -181,6 +182,52 @@ static void test_kfree_rcu(struct kunit *test)
>>  	KUNIT_EXPECT_EQ(test, 0, slab_errors);
>>  }
>>  
>> +struct cache_destroy_work {
>> +        struct work_struct work;
>> +        struct kmem_cache *s;
>> +};
>> +
>> +static void cache_destroy_workfn(struct work_struct *w)
>> +{
>> +	struct cache_destroy_work *cdw;
>> +
>> +	cdw = container_of(w, struct cache_destroy_work, work);
>> +
>> +	kmem_cache_destroy(cdw->s);
>> +}
>> +
>> +static void test_kfree_rcu_wq_destroy(struct kunit *test)
>> +{
>> +	struct test_kfree_rcu_struct *p;
>> +	struct cache_destroy_work cdw;
>> +	struct workqueue_struct *wq;
>> +	struct kmem_cache *s;
>> +
>> +	if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
>> +		kunit_skip(test, "can't do kfree_rcu() when test is built-in");
>> +
>> +	INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
>> +	wq = alloc_workqueue("test_kfree_rcu_destroy_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
>>
> Maybe it is worth to add WQ_HIGHPRI also to be ahead?

I looked at what nvme_wq uses:

unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;

HIGHPRI wasn't there, and sysfs didn't seem important.


>> +	if (!wq)
>> +		kunit_skip(test, "failed to alloc wq");
>> +
>> +	s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
>> +				   sizeof(struct test_kfree_rcu_struct),
>> +				   SLAB_NO_MERGE);
>> +	p = kmem_cache_alloc(s, GFP_KERNEL);
>> +
>> +	kfree_rcu(p, rcu);
>> +
>> +	cdw.s = s;
>> +	queue_work(wq, &cdw.work);
>> +	msleep(1000);
> I am not sure it is needed. From the other hand it does nothing if
> i do not miss anything.

I've tried to add that in case it makes any difference (letting the
processing be done on its own instead of flushing immediately), but the
results was the same either way, no warning. AFAICS it also doesn't depend
on some debug CONFIG_ I could be missing, but maybe I'm wrong.

Hope you have more success :) Thanks.

>> +	flush_work(&cdw.work);
>> +
>> +	destroy_workqueue(wq);
>> +
>> +	KUNIT_EXPECT_EQ(test, 0, slab_errors);
>> +}
>> +
>>  static void test_leak_destroy(struct kunit *test)
>>  {
>>  	struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
>> @@ -254,6 +301,7 @@ static struct kunit_case test_cases[] = {
>>  	KUNIT_CASE(test_clobber_redzone_free),
>>  	KUNIT_CASE(test_kmalloc_redzone_access),
>>  	KUNIT_CASE(test_kfree_rcu),
>> +	KUNIT_CASE(test_kfree_rcu_wq_destroy),
>>  	KUNIT_CASE(test_leak_destroy),
>>  	KUNIT_CASE(test_krealloc_redzone_zeroing),
>>  	{}
>> -- 
>> 2.48.1
>> 
>> 
> 
> --
> Uladzislau Rezki



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25  9:57         ` Vlastimil Babka
  2025-02-25 13:39           ` Uladzislau Rezki
@ 2025-02-25 16:03           ` Keith Busch
  2025-02-25 17:05             ` Keith Busch
  1 sibling, 1 reply; 44+ messages in thread
From: Keith Busch @ 2025-02-25 16:03 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> it's too simple, or racy, and thus we are not flushing any of the queues from
> kvfree_rcu_barrier()?

Thanks, your test readily triggers it for me, but only if I load
rcutorture at the same time.

[  142.223220] CPU: 11 UID: 0 PID: 186 Comm: kworker/u64:11 Tainted: G            E    N 6.13.0-04839-g5e7b40f0ddce-dirty #831
[  142.223222] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
[  142.223223] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[  142.223224] Workqueue: test_kfree_rcu_destroy_wq cache_destroy_workfn [slub_kunit]
[  142.223230] Call Trace:
[  142.223231]  <TASK>
[  142.223233]  dump_stack_lvl+0x64/0x90
[  142.223239]  print_circular_bug+0x2c5/0x400
[  142.223243]  check_noncircular+0x103/0x120
[  142.223246]  ? save_trace+0x3e/0x360
[  142.223249]  __lock_acquire+0x1481/0x24b0
[  142.223252]  lock_acquire+0xaa/0x2a0
[  142.223253]  ? console_lock_spinning_enable+0x3e/0x60
[  142.223255]  ? lock_release+0xb3/0x250
[  142.223257]  console_lock_spinning_enable+0x5a/0x60
[  142.223258]  ? console_lock_spinning_enable+0x3e/0x60
[  142.223260]  console_flush_all+0x2b1/0x490
[  142.223262]  ? console_flush_all+0x29/0x490
[  142.223264]  console_unlock+0x49/0xf0
[  142.223266]  vprintk_emit+0x12b/0x300
[  142.223269]  ? kfree_rcu_shrink_scan+0x120/0x120
[  142.223270]  _printk+0x48/0x50
[  142.223272]  ? kfree_rcu_shrink_scan+0x120/0x120
[  142.223273]  __warn_printk+0xb4/0xe0
[  142.223276]  ? 0xffffffffa05d6000
[  142.223278]  ? kfree_rcu_shrink_scan+0x120/0x120
[  142.223279]  check_flush_dependency.part.0+0xad/0x100
[  142.223282]  __flush_work+0x38a/0x4a0
[  142.223284]  ? find_held_lock+0x2b/0x80
[  142.223287]  ? flush_rcu_work+0x26/0x40
[  142.223289]  ? lock_release+0xb3/0x250
[  142.223290]  ? __mutex_unlock_slowpath+0x2c/0x270
[  142.223292]  flush_rcu_work+0x30/0x40
[  142.223294]  kvfree_rcu_barrier+0xe9/0x130
[  142.223296]  kmem_cache_destroy+0x2b/0x1f0
[  142.223297]  cache_destroy_workfn+0x20/0x40 [slub_kunit]
[  142.223299]  process_one_work+0x1cd/0x560
[  142.223302]  worker_thread+0x183/0x310
[  142.223304]  ? rescuer_thread+0x330/0x330
[  142.223306]  kthread+0xd8/0x1d0
[  142.223308]  ? ret_from_fork+0x17/0x50
[  142.223310]  ? lock_release+0xb3/0x250
[  142.223311]  ? kthreads_online_cpu+0xf0/0xf0
[  142.223313]  ret_from_fork+0x2d/0x50
[  142.223315]  ? kthreads_online_cpu+0xf0/0xf0
[  142.223317]  ret_from_fork_asm+0x11/0x20
[  142.223321]  </TASK>
[  142.371052] workqueue: WQ_MEM_RECLAIM test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
[  142.371072] WARNING: CPU: 11 PID: 186 at kernel/workqueue.c:3715 check_flush_dependency.part.0+0xad/0x100
[  142.375748] Modules linked in: slub_kunit(E) rcutorture(E) torture(E) kunit(E) iTCO_wdt(E) iTCO_vendor_support(E) intel_uncore_frequency_common(E) skx_edac_common(E) nfit(E) libnvdimm(E) kvm_intel(E) kvm(E) evdev(E) bochs(E) serio_raw(E) drm_kms_helper(E) i2c_i801(E) e1000e(E) i2c_smbus(E) intel_agp(E) intel_gtt(E) lpc_ich(E) agpgart(E) mfd_core(E) drm_shm]


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 16:03           ` Keith Busch
@ 2025-02-25 17:05             ` Keith Busch
  2025-02-25 17:41               ` Uladzislau Rezki
  0 siblings, 1 reply; 44+ messages in thread
From: Keith Busch @ 2025-02-25 17:05 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 09:03:38AM -0700, Keith Busch wrote:
> On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> > I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> > it's too simple, or racy, and thus we are not flushing any of the queues from
> > kvfree_rcu_barrier()?
>
> Thanks, your test readily triggers it for me, but only if I load
> rcutorture at the same time.

Oops, I sent the wrong kernel messages. This is the relevant part:

[  142.371052] workqueue: WQ_MEM_RECLAIM
test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is
flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
[  142.371072] WARNING: CPU: 11 PID: 186 at kernel/workqueue.c:3715
check_flush_dependency.part.0+0xad/0x100
[  142.375748] Modules linked in: slub_kunit(E) rcutorture(E)
torture(E) kunit(E) iTCO_wdt(E) iTCO_vendor_support(E)
intel_uncore_frequency_common(E) skx_edac_common(E) nfit(E)
libnvdimm(E) kvm_intel(E) kvm(E) evdev(E) bochs(E) serio_raw(E)
drm_kms_helper(E) i2c_i801(E) e1000e(E) i2c_smbus(E) intel_agp(E)
intel_gtt(E) lpc_ich(E) agpgart(E) mfd_core(E) drm_shm]
[  142.384553] CPU: 11 UID: 0 PID: 186 Comm: kworker/u64:11 Tainted: G
           E    N 6.13.0-04839-g5e7b40f0ddce-dirty #831
[  142.386755] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
[  142.387849] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[  142.390236] Workqueue: test_kfree_rcu_destroy_wq
cache_destroy_workfn [slub_kunit]
[  142.391863] RIP: 0010:check_flush_dependency.part.0+0xad/0x100
[  142.393183] Code: 75 dc 48 8b 55 18 49 8d 8d 78 01 00 00 4d 89 f0
48 81 c6 78 01 00 00 48 c7 c7 00 e1 9a 82 c6 05 4f 39 c5 02 01 e8 53
bd fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 80 3d 39 39 c5 02 00 75 83
41 8b
[  142.396981] RSP: 0018:ffffc900007cfc90 EFLAGS: 00010092
[  142.398124] RAX: 000000000000008f RBX: ffff88803e9b10a0 RCX: 0000000000000027
[  142.399605] RDX: ffff88803eba0d08 RSI: 0000000000000001 RDI: ffff88803eba0d00
[  142.401092] RBP: ffff888007d9a480 R08: ffffffff83b8c808 R09: 0000000000000003
[  142.402548] R10: ffffffff8348c820 R11: ffffffff83a11d58 R12: ffff888007150000
[  142.404098] R13: ffff888005961400 R14: ffffffff813221a0 R15: ffff888005961400
[  142.405561] FS:  0000000000000000(0000) GS:ffff88803eb80000(0000)
knlGS:0000000000000000
[  142.407297] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  142.408658] CR2: 00007f826bd1a000 CR3: 00000000069db002 CR4: 0000000000772ef0
[  142.410259] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  142.411871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  142.413341] PKRU: 55555554
[  142.414038] Call Trace:
[  142.414658]  <TASK>
[  142.415249]  ? __warn+0x8d/0x180
[  142.416035]  ? check_flush_dependency.part.0+0xad/0x100
[  142.417182]  ? report_bug+0x160/0x170
[  142.418041]  ? handle_bug+0x4f/0x90
[  142.418861]  ? exc_invalid_op+0x14/0x70
[  142.419853]  ? asm_exc_invalid_op+0x16/0x20
[  142.420877]  ? kfree_rcu_shrink_scan+0x120/0x120
[  142.422029]  ? check_flush_dependency.part.0+0xad/0x100
[  142.423244]  __flush_work+0x38a/0x4a0
[  142.424157]  ? find_held_lock+0x2b/0x80
[  142.425070]  ? flush_rcu_work+0x26/0x40
[  142.425953]  ? lock_release+0xb3/0x250
[  142.426785]  ? __mutex_unlock_slowpath+0x2c/0x270
[  142.427906]  flush_rcu_work+0x30/0x40
[  142.428756]  kvfree_rcu_barrier+0xe9/0x130
[  142.429649]  kmem_cache_destroy+0x2b/0x1f0
[  142.430578]  cache_destroy_workfn+0x20/0x40 [slub_kunit]
[  142.431729]  process_one_work+0x1cd/0x560
[  142.432620]  worker_thread+0x183/0x310
[  142.433487]  ? rescuer_thread+0x330/0x330
[  142.434428]  kthread+0xd8/0x1d0
[  142.435248]  ? ret_from_fork+0x17/0x50
[  142.436165]  ? lock_release+0xb3/0x250
[  142.437106]  ? kthreads_online_cpu+0xf0/0xf0
[  142.438133]  ret_from_fork+0x2d/0x50
[  142.439045]  ? kthreads_online_cpu+0xf0/0xf0
[  142.440428]  ret_from_fork_asm+0x11/0x20
[  142.441476]  </TASK>
[  142.442152] irq event stamp: 22858
[  142.443002] hardirqs last  enabled at (22857): [<ffffffff82044ef4>]
_raw_spin_unlock_irq+0x24/0x30
[  142.445032] hardirqs last disabled at (22858): [<ffffffff82044ce3>]
_raw_spin_lock_irq+0x43/0x50
[  142.451450] softirqs last  enabled at (22714): [<ffffffff810bfdbc>]
__irq_exit_rcu+0xac/0xd0
[  142.453345] softirqs last disabled at (22709): [<ffffffff810bfdbc>]
__irq_exit_rcu+0xac/0xd0
[  142.455305] ---[ end trace 0000000000000000 ]---


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 17:05             ` Keith Busch
@ 2025-02-25 17:41               ` Uladzislau Rezki
  2025-02-25 18:11                 ` Vlastimil Babka
  2025-02-25 18:21                 ` Uladzislau Rezki
  0 siblings, 2 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-25 17:41 UTC (permalink / raw)
  To: Keith Busch, Vlastimil Babka
  Cc: Vlastimil Babka, Uladzislau Rezki, Paul E. McKenney,
	Joel Fernandes, Josh Triplett, Boqun Feng, Christoph Lameter,
	David Rientjes, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Zqiang, Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik, linux-nvme,
	leitao

On Tue, Feb 25, 2025 at 10:05:37AM -0700, Keith Busch wrote:
> On Tue, Feb 25, 2025 at 09:03:38AM -0700, Keith Busch wrote:
> > On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> > > I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> > > it's too simple, or racy, and thus we are not flushing any of the queues from
> > > kvfree_rcu_barrier()?
> >
> > Thanks, your test readily triggers it for me, but only if I load
> > rcutorture at the same time.
> 
> Oops, I sent the wrong kernel messages. This is the relevant part:
> 
> [  142.371052] workqueue: WQ_MEM_RECLAIM
> test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is
> flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> [  142.371072] WARNING: CPU: 11 PID: 186 at kernel/workqueue.c:3715
> check_flush_dependency.part.0+0xad/0x100
> [  142.375748] Modules linked in: slub_kunit(E) rcutorture(E)
> torture(E) kunit(E) iTCO_wdt(E) iTCO_vendor_support(E)
> intel_uncore_frequency_common(E) skx_edac_common(E) nfit(E)
> libnvdimm(E) kvm_intel(E) kvm(E) evdev(E) bochs(E) serio_raw(E)
> drm_kms_helper(E) i2c_i801(E) e1000e(E) i2c_smbus(E) intel_agp(E)
> intel_gtt(E) lpc_ich(E) agpgart(E) mfd_core(E) drm_shm]
> [  142.384553] CPU: 11 UID: 0 PID: 186 Comm: kworker/u64:11 Tainted: G
>            E    N 6.13.0-04839-g5e7b40f0ddce-dirty #831
> [  142.386755] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
> [  142.387849] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> [  142.390236] Workqueue: test_kfree_rcu_destroy_wq
> cache_destroy_workfn [slub_kunit]
> [  142.391863] RIP: 0010:check_flush_dependency.part.0+0xad/0x100
> [  142.393183] Code: 75 dc 48 8b 55 18 49 8d 8d 78 01 00 00 4d 89 f0
> 48 81 c6 78 01 00 00 48 c7 c7 00 e1 9a 82 c6 05 4f 39 c5 02 01 e8 53
> bd fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 80 3d 39 39 c5 02 00 75 83
> 41 8b
> [  142.396981] RSP: 0018:ffffc900007cfc90 EFLAGS: 00010092
> [  142.398124] RAX: 000000000000008f RBX: ffff88803e9b10a0 RCX: 0000000000000027
> [  142.399605] RDX: ffff88803eba0d08 RSI: 0000000000000001 RDI: ffff88803eba0d00
> [  142.401092] RBP: ffff888007d9a480 R08: ffffffff83b8c808 R09: 0000000000000003
> [  142.402548] R10: ffffffff8348c820 R11: ffffffff83a11d58 R12: ffff888007150000
> [  142.404098] R13: ffff888005961400 R14: ffffffff813221a0 R15: ffff888005961400
> [  142.405561] FS:  0000000000000000(0000) GS:ffff88803eb80000(0000)
> knlGS:0000000000000000
> [  142.407297] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  142.408658] CR2: 00007f826bd1a000 CR3: 00000000069db002 CR4: 0000000000772ef0
> [  142.410259] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [  142.411871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [  142.413341] PKRU: 55555554
> [  142.414038] Call Trace:
> [  142.414658]  <TASK>
> [  142.415249]  ? __warn+0x8d/0x180
> [  142.416035]  ? check_flush_dependency.part.0+0xad/0x100
> [  142.417182]  ? report_bug+0x160/0x170
> [  142.418041]  ? handle_bug+0x4f/0x90
> [  142.418861]  ? exc_invalid_op+0x14/0x70
> [  142.419853]  ? asm_exc_invalid_op+0x16/0x20
> [  142.420877]  ? kfree_rcu_shrink_scan+0x120/0x120
> [  142.422029]  ? check_flush_dependency.part.0+0xad/0x100
> [  142.423244]  __flush_work+0x38a/0x4a0
> [  142.424157]  ? find_held_lock+0x2b/0x80
> [  142.425070]  ? flush_rcu_work+0x26/0x40
> [  142.425953]  ? lock_release+0xb3/0x250
> [  142.426785]  ? __mutex_unlock_slowpath+0x2c/0x270
> [  142.427906]  flush_rcu_work+0x30/0x40
> [  142.428756]  kvfree_rcu_barrier+0xe9/0x130
> [  142.429649]  kmem_cache_destroy+0x2b/0x1f0
> [  142.430578]  cache_destroy_workfn+0x20/0x40 [slub_kunit]
> [  142.431729]  process_one_work+0x1cd/0x560
> [  142.432620]  worker_thread+0x183/0x310
> [  142.433487]  ? rescuer_thread+0x330/0x330
> [  142.434428]  kthread+0xd8/0x1d0
> [  142.435248]  ? ret_from_fork+0x17/0x50
> [  142.436165]  ? lock_release+0xb3/0x250
> [  142.437106]  ? kthreads_online_cpu+0xf0/0xf0
> [  142.438133]  ret_from_fork+0x2d/0x50
> [  142.439045]  ? kthreads_online_cpu+0xf0/0xf0
> [  142.440428]  ret_from_fork_asm+0x11/0x20
> [  142.441476]  </TASK>
> [  142.442152] irq event stamp: 22858
> [  142.443002] hardirqs last  enabled at (22857): [<ffffffff82044ef4>]
> _raw_spin_unlock_irq+0x24/0x30
> [  142.445032] hardirqs last disabled at (22858): [<ffffffff82044ce3>]
> _raw_spin_lock_irq+0x43/0x50
> [  142.451450] softirqs last  enabled at (22714): [<ffffffff810bfdbc>]
> __irq_exit_rcu+0xac/0xd0
> [  142.453345] softirqs last disabled at (22709): [<ffffffff810bfdbc>]
> __irq_exit_rcu+0xac/0xd0
> [  142.455305] ---[ end trace 0000000000000000 ]---
Thanks!

I can trigger this also:

<snip>
[   21.712856] KTAP version 1
[   21.712862] 1..1
[   21.714486]     KTAP version 1
[   21.714490]     # Subtest: slub_test
[   21.714492]     # module: slub_kunit
[   21.714495]     1..10
[   21.750359]     ok 1 test_clobber_zone
[   21.750955]     ok 2 test_next_pointer
[   21.751532]     ok 3 test_first_word
[   21.751991]     ok 4 test_clobber_50th_byte
[   21.752493]     ok 5 test_clobber_redzone_free
[   21.753004] stackdepot: allocating hash table of 1048576 entries via kvcalloc
[   21.756176]     ok 6 test_kmalloc_redzone_access
[   21.806549]     ok 7 test_kfree_rcu
[   22.058010] ------------[ cut here ]------------
[   22.058015] workqueue: WQ_MEM_RECLAIM test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
[   22.058039] WARNING: CPU: 19 PID: 474 at kernel/workqueue.c:3715 check_flush_dependency.part.0+0xbe/0x130
[   22.058047] Modules linked in: slub_kunit(E) kunit(E) binfmt_misc(E) bochs(E) drm_client_lib(E) drm_shmem_helper(E) ppdev(E) drm_kms_helper(E) snd_pcm(E) sg(E) snd_timer(E) evdev(E) snd(E) joydev(E) parport_pc(E) parport(E) soundcore(E) serio_raw(E) button(E) pcspkr(E) drm(E) fuse(E) dm_mod(E) efi_pstore(E) configfs(E) loop(E) qemu_fw_cfg(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) sr_mod(E) sd_mod(E) cdrom(E) ata_generic(E) ata_piix(E) libata(E) scsi_mod(E) i2c_piix4(E) psmouse(E) e1000(E) i2c_smbus(E) scsi_common(E) floppy(E)
[   22.058091] CPU: 19 UID: 0 PID: 474 Comm: kworker/u257:0 Kdump: loaded Tainted: G            E    N 6.14.0-rc1+ #286
[   22.058096] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
[   22.058097] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   22.058099] Workqueue: test_kfree_rcu_destroy_wq cache_destroy_workfn [slub_kunit]
[   22.058103] RIP: 0010:check_flush_dependency.part.0+0xbe/0x130
[   22.058106] Code: 75 d0 48 8b 55 18 49 8d 8d c0 00 00 00 4d 89 f0 48 81 c6 c0 00 00 00 48 c7 c7 b0 7d c8 bd c6 05 6c 78 53 01 01 e8 a2 ae fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc f6 c4 08 74 94 31 ed
[   22.058108] RSP: 0018:ffff95e5c123fd50 EFLAGS: 00010086
[   22.058111] RAX: 0000000000000000 RBX: ffff89a4ff22d5a0 RCX: 0000000000000000
[   22.058113] RDX: 0000000000000003 RSI: ffffffffbdce1697 RDI: 00000000ffffffff
[   22.058114] RBP: ffff89961043a780 R08: 0000000000000000 R09: 0000000000000003
[   22.058116] R10: ffff95e5c123fbe8 R11: ffff89a53fefefa8 R12: ffff89960cb6b080
[   22.058117] R13: ffff899600051400 R14: ffffffffbcf2ba80 R15: ffff89960005a800
[   22.058120] FS:  0000000000000000(0000) GS:ffff89a4ff2c0000(0000) knlGS:0000000000000000
[   22.058122] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   22.058124] CR2: 000055bf2cbc6038 CR3: 000000010dc1e000 CR4: 00000000000006f0
[   22.058128] Call Trace:
[   22.058130]  <TASK>
[   22.058133]  ? __warn+0x85/0x130
[   22.058137]  ? check_flush_dependency.part.0+0xbe/0x130
[   22.058139]  ? report_bug+0x18d/0x1c0
[   22.058142]  ? prb_read_valid+0x17/0x20
[   22.058147]  ? handle_bug+0x58/0x90
[   22.058151]  ? exc_invalid_op+0x13/0x60
[   22.058154]  ? asm_exc_invalid_op+0x16/0x20
[   22.058158]  ? __pfx_kfree_rcu_work+0x10/0x10
[   22.058162]  ? check_flush_dependency.part.0+0xbe/0x130
[   22.058165]  __flush_work+0xd6/0x320
[   22.058168]  flush_rcu_work+0x39/0x50
[   22.058171]  kvfree_rcu_barrier+0xe9/0x130
[   22.058174]  kmem_cache_destroy+0x18/0x140
[   22.058177]  process_one_work+0x184/0x3a0
[   22.058180]  worker_thread+0x24d/0x360
[   22.058183]  ? __pfx_worker_thread+0x10/0x10
[   22.058185]  kthread+0xfc/0x230
[   22.058189]  ? finish_task_switch.isra.0+0x85/0x2a0
[   22.058192]  ? __pfx_kthread+0x10/0x10
[   22.058195]  ret_from_fork+0x30/0x50
[   22.058199]  ? __pfx_kthread+0x10/0x10
[   22.058202]  ret_from_fork_asm+0x1a/0x30
[   22.058206]  </TASK>
[   22.058207] ---[ end trace 0000000000000000 ]---
[   23.123507]     ok 8 test_kfree_rcu_wq_destroy
[   23.151033]     ok 9 test_leak_destroy
[   23.151612]     ok 10 test_krealloc_redzone_zeroing
[   23.151617] # slub_test: pass:10 fail:0 skip:0 total:10
[   23.151619] # Totals: pass:10 fail:0 skip:0 total:10
[   23.151620] ok 1 slub_test
urezki@pc638:~$
<snip>

but i had to adapt slightly the Vlastimil's test:

diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index f11691315c2f..222f6d204b0d 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include "../mm/slab.h"

 static struct kunit_resource resource;
@@ -181,6 +182,63 @@ static void test_kfree_rcu(struct kunit *test)
        KUNIT_EXPECT_EQ(test, 0, slab_errors);
 }

+struct cache_destroy_work {
+        struct work_struct work;
+        struct kmem_cache *s;
+};
+
+static void cache_destroy_workfn(struct work_struct *w)
+{
+       struct cache_destroy_work *cdw;
+
+       cdw = container_of(w, struct cache_destroy_work, work);
+       kmem_cache_destroy(cdw->s);
+}
+
+#define KMEM_CACHE_DESTROY_NR 10
+
+static void test_kfree_rcu_wq_destroy(struct kunit *test)
+{
+       struct test_kfree_rcu_struct *p;
+       struct cache_destroy_work cdw;
+       struct workqueue_struct *wq;
+       struct kmem_cache *s;
+       unsigned int rnd;
+       int i;
+
+       if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
+               kunit_skip(test, "can't do kfree_rcu() when test is built-in");
+
+       INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
+       wq = alloc_workqueue("test_kfree_rcu_destroy_wq",
+                       WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+
+       if (!wq)
+               kunit_skip(test, "failed to alloc wq");
+
+       for (i = 0; i < KMEM_CACHE_DESTROY_NR; i++) {
+               s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
+                               sizeof(struct test_kfree_rcu_struct),
+                               SLAB_NO_MERGE);
+
+               if (!s)
+                       kunit_skip(test, "failed to create cache");
+
+               rnd = get_random_u8() % 255;
+               p = kmem_cache_alloc(s, GFP_KERNEL);
+               kfree_rcu(p, rcu);
+
+               cdw.s = s;
+
+               msleep(rnd);
+               queue_work(wq, &cdw.work);
+               flush_work(&cdw.work);
+       }
+
+       destroy_workqueue(wq);
+       KUNIT_EXPECT_EQ(test, 0, slab_errors);
+}
+
 static void test_leak_destroy(struct kunit *test)
 {
        struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
@@ -254,6 +312,7 @@ static struct kunit_case test_cases[] = {
        KUNIT_CASE(test_clobber_redzone_free),
        KUNIT_CASE(test_kmalloc_redzone_access),
        KUNIT_CASE(test_kfree_rcu),
+       KUNIT_CASE(test_kfree_rcu_wq_destroy),
        KUNIT_CASE(test_leak_destroy),
        KUNIT_CASE(test_krealloc_redzone_zeroing),
        {}

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 17:41               ` Uladzislau Rezki
@ 2025-02-25 18:11                 ` Vlastimil Babka
  2025-02-25 18:21                   ` Uladzislau Rezki
  2025-02-25 18:21                 ` Uladzislau Rezki
  1 sibling, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-25 18:11 UTC (permalink / raw)
  To: Uladzislau Rezki, Keith Busch
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/25/25 18:41, Uladzislau Rezki wrote:
> 
> but i had to adapt slightly the Vlastimil's test:

Great, works for me too, thanks!



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 17:41               ` Uladzislau Rezki
  2025-02-25 18:11                 ` Vlastimil Babka
@ 2025-02-25 18:21                 ` Uladzislau Rezki
  2025-02-26 10:59                   ` Vlastimil Babka
  2025-02-26 15:51                   ` Keith Busch
  1 sibling, 2 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-25 18:21 UTC (permalink / raw)
  To: Keith Busch, Vlastimil Babka
  Cc: Keith Busch, Vlastimil Babka, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 06:41:19PM +0100, Uladzislau Rezki wrote:
> On Tue, Feb 25, 2025 at 10:05:37AM -0700, Keith Busch wrote:
> > On Tue, Feb 25, 2025 at 09:03:38AM -0700, Keith Busch wrote:
> > > On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> > > > I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> > > > it's too simple, or racy, and thus we are not flushing any of the queues from
> > > > kvfree_rcu_barrier()?
> > >
> > > Thanks, your test readily triggers it for me, but only if I load
> > > rcutorture at the same time.
> > 
> > Oops, I sent the wrong kernel messages. This is the relevant part:
> > 
> > [  142.371052] workqueue: WQ_MEM_RECLAIM
> > test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is
> > flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> > [  142.371072] WARNING: CPU: 11 PID: 186 at kernel/workqueue.c:3715
> > check_flush_dependency.part.0+0xad/0x100
> > [  142.375748] Modules linked in: slub_kunit(E) rcutorture(E)
> > torture(E) kunit(E) iTCO_wdt(E) iTCO_vendor_support(E)
> > intel_uncore_frequency_common(E) skx_edac_common(E) nfit(E)
> > libnvdimm(E) kvm_intel(E) kvm(E) evdev(E) bochs(E) serio_raw(E)
> > drm_kms_helper(E) i2c_i801(E) e1000e(E) i2c_smbus(E) intel_agp(E)
> > intel_gtt(E) lpc_ich(E) agpgart(E) mfd_core(E) drm_shm]
> > [  142.384553] CPU: 11 UID: 0 PID: 186 Comm: kworker/u64:11 Tainted: G
> >            E    N 6.13.0-04839-g5e7b40f0ddce-dirty #831
> > [  142.386755] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
> > [  142.387849] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> > BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> > [  142.390236] Workqueue: test_kfree_rcu_destroy_wq
> > cache_destroy_workfn [slub_kunit]
> > [  142.391863] RIP: 0010:check_flush_dependency.part.0+0xad/0x100
> > [  142.393183] Code: 75 dc 48 8b 55 18 49 8d 8d 78 01 00 00 4d 89 f0
> > 48 81 c6 78 01 00 00 48 c7 c7 00 e1 9a 82 c6 05 4f 39 c5 02 01 e8 53
> > bd fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 80 3d 39 39 c5 02 00 75 83
> > 41 8b
> > [  142.396981] RSP: 0018:ffffc900007cfc90 EFLAGS: 00010092
> > [  142.398124] RAX: 000000000000008f RBX: ffff88803e9b10a0 RCX: 0000000000000027
> > [  142.399605] RDX: ffff88803eba0d08 RSI: 0000000000000001 RDI: ffff88803eba0d00
> > [  142.401092] RBP: ffff888007d9a480 R08: ffffffff83b8c808 R09: 0000000000000003
> > [  142.402548] R10: ffffffff8348c820 R11: ffffffff83a11d58 R12: ffff888007150000
> > [  142.404098] R13: ffff888005961400 R14: ffffffff813221a0 R15: ffff888005961400
> > [  142.405561] FS:  0000000000000000(0000) GS:ffff88803eb80000(0000)
> > knlGS:0000000000000000
> > [  142.407297] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [  142.408658] CR2: 00007f826bd1a000 CR3: 00000000069db002 CR4: 0000000000772ef0
> > [  142.410259] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [  142.411871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > [  142.413341] PKRU: 55555554
> > [  142.414038] Call Trace:
> > [  142.414658]  <TASK>
> > [  142.415249]  ? __warn+0x8d/0x180
> > [  142.416035]  ? check_flush_dependency.part.0+0xad/0x100
> > [  142.417182]  ? report_bug+0x160/0x170
> > [  142.418041]  ? handle_bug+0x4f/0x90
> > [  142.418861]  ? exc_invalid_op+0x14/0x70
> > [  142.419853]  ? asm_exc_invalid_op+0x16/0x20
> > [  142.420877]  ? kfree_rcu_shrink_scan+0x120/0x120
> > [  142.422029]  ? check_flush_dependency.part.0+0xad/0x100
> > [  142.423244]  __flush_work+0x38a/0x4a0
> > [  142.424157]  ? find_held_lock+0x2b/0x80
> > [  142.425070]  ? flush_rcu_work+0x26/0x40
> > [  142.425953]  ? lock_release+0xb3/0x250
> > [  142.426785]  ? __mutex_unlock_slowpath+0x2c/0x270
> > [  142.427906]  flush_rcu_work+0x30/0x40
> > [  142.428756]  kvfree_rcu_barrier+0xe9/0x130
> > [  142.429649]  kmem_cache_destroy+0x2b/0x1f0
> > [  142.430578]  cache_destroy_workfn+0x20/0x40 [slub_kunit]
> > [  142.431729]  process_one_work+0x1cd/0x560
> > [  142.432620]  worker_thread+0x183/0x310
> > [  142.433487]  ? rescuer_thread+0x330/0x330
> > [  142.434428]  kthread+0xd8/0x1d0
> > [  142.435248]  ? ret_from_fork+0x17/0x50
> > [  142.436165]  ? lock_release+0xb3/0x250
> > [  142.437106]  ? kthreads_online_cpu+0xf0/0xf0
> > [  142.438133]  ret_from_fork+0x2d/0x50
> > [  142.439045]  ? kthreads_online_cpu+0xf0/0xf0
> > [  142.440428]  ret_from_fork_asm+0x11/0x20
> > [  142.441476]  </TASK>
> > [  142.442152] irq event stamp: 22858
> > [  142.443002] hardirqs last  enabled at (22857): [<ffffffff82044ef4>]
> > _raw_spin_unlock_irq+0x24/0x30
> > [  142.445032] hardirqs last disabled at (22858): [<ffffffff82044ce3>]
> > _raw_spin_lock_irq+0x43/0x50
> > [  142.451450] softirqs last  enabled at (22714): [<ffffffff810bfdbc>]
> > __irq_exit_rcu+0xac/0xd0
> > [  142.453345] softirqs last disabled at (22709): [<ffffffff810bfdbc>]
> > __irq_exit_rcu+0xac/0xd0
> > [  142.455305] ---[ end trace 0000000000000000 ]---
> Thanks!
> 
> I can trigger this also:
> 
> <snip>
> [   21.712856] KTAP version 1
> [   21.712862] 1..1
> [   21.714486]     KTAP version 1
> [   21.714490]     # Subtest: slub_test
> [   21.714492]     # module: slub_kunit
> [   21.714495]     1..10
> [   21.750359]     ok 1 test_clobber_zone
> [   21.750955]     ok 2 test_next_pointer
> [   21.751532]     ok 3 test_first_word
> [   21.751991]     ok 4 test_clobber_50th_byte
> [   21.752493]     ok 5 test_clobber_redzone_free
> [   21.753004] stackdepot: allocating hash table of 1048576 entries via kvcalloc
> [   21.756176]     ok 6 test_kmalloc_redzone_access
> [   21.806549]     ok 7 test_kfree_rcu
> [   22.058010] ------------[ cut here ]------------
> [   22.058015] workqueue: WQ_MEM_RECLAIM test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> [   22.058039] WARNING: CPU: 19 PID: 474 at kernel/workqueue.c:3715 check_flush_dependency.part.0+0xbe/0x130
> [   22.058047] Modules linked in: slub_kunit(E) kunit(E) binfmt_misc(E) bochs(E) drm_client_lib(E) drm_shmem_helper(E) ppdev(E) drm_kms_helper(E) snd_pcm(E) sg(E) snd_timer(E) evdev(E) snd(E) joydev(E) parport_pc(E) parport(E) soundcore(E) serio_raw(E) button(E) pcspkr(E) drm(E) fuse(E) dm_mod(E) efi_pstore(E) configfs(E) loop(E) qemu_fw_cfg(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) sr_mod(E) sd_mod(E) cdrom(E) ata_generic(E) ata_piix(E) libata(E) scsi_mod(E) i2c_piix4(E) psmouse(E) e1000(E) i2c_smbus(E) scsi_common(E) floppy(E)
> [   22.058091] CPU: 19 UID: 0 PID: 474 Comm: kworker/u257:0 Kdump: loaded Tainted: G            E    N 6.14.0-rc1+ #286
> [   22.058096] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
> [   22.058097] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [   22.058099] Workqueue: test_kfree_rcu_destroy_wq cache_destroy_workfn [slub_kunit]
> [   22.058103] RIP: 0010:check_flush_dependency.part.0+0xbe/0x130
> [   22.058106] Code: 75 d0 48 8b 55 18 49 8d 8d c0 00 00 00 4d 89 f0 48 81 c6 c0 00 00 00 48 c7 c7 b0 7d c8 bd c6 05 6c 78 53 01 01 e8 a2 ae fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc f6 c4 08 74 94 31 ed
> [   22.058108] RSP: 0018:ffff95e5c123fd50 EFLAGS: 00010086
> [   22.058111] RAX: 0000000000000000 RBX: ffff89a4ff22d5a0 RCX: 0000000000000000
> [   22.058113] RDX: 0000000000000003 RSI: ffffffffbdce1697 RDI: 00000000ffffffff
> [   22.058114] RBP: ffff89961043a780 R08: 0000000000000000 R09: 0000000000000003
> [   22.058116] R10: ffff95e5c123fbe8 R11: ffff89a53fefefa8 R12: ffff89960cb6b080
> [   22.058117] R13: ffff899600051400 R14: ffffffffbcf2ba80 R15: ffff89960005a800
> [   22.058120] FS:  0000000000000000(0000) GS:ffff89a4ff2c0000(0000) knlGS:0000000000000000
> [   22.058122] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   22.058124] CR2: 000055bf2cbc6038 CR3: 000000010dc1e000 CR4: 00000000000006f0
> [   22.058128] Call Trace:
> [   22.058130]  <TASK>
> [   22.058133]  ? __warn+0x85/0x130
> [   22.058137]  ? check_flush_dependency.part.0+0xbe/0x130
> [   22.058139]  ? report_bug+0x18d/0x1c0
> [   22.058142]  ? prb_read_valid+0x17/0x20
> [   22.058147]  ? handle_bug+0x58/0x90
> [   22.058151]  ? exc_invalid_op+0x13/0x60
> [   22.058154]  ? asm_exc_invalid_op+0x16/0x20
> [   22.058158]  ? __pfx_kfree_rcu_work+0x10/0x10
> [   22.058162]  ? check_flush_dependency.part.0+0xbe/0x130
> [   22.058165]  __flush_work+0xd6/0x320
> [   22.058168]  flush_rcu_work+0x39/0x50
> [   22.058171]  kvfree_rcu_barrier+0xe9/0x130
> [   22.058174]  kmem_cache_destroy+0x18/0x140
> [   22.058177]  process_one_work+0x184/0x3a0
> [   22.058180]  worker_thread+0x24d/0x360
> [   22.058183]  ? __pfx_worker_thread+0x10/0x10
> [   22.058185]  kthread+0xfc/0x230
> [   22.058189]  ? finish_task_switch.isra.0+0x85/0x2a0
> [   22.058192]  ? __pfx_kthread+0x10/0x10
> [   22.058195]  ret_from_fork+0x30/0x50
> [   22.058199]  ? __pfx_kthread+0x10/0x10
> [   22.058202]  ret_from_fork_asm+0x1a/0x30
> [   22.058206]  </TASK>
> [   22.058207] ---[ end trace 0000000000000000 ]---
> [   23.123507]     ok 8 test_kfree_rcu_wq_destroy
> [   23.151033]     ok 9 test_leak_destroy
> [   23.151612]     ok 10 test_krealloc_redzone_zeroing
> [   23.151617] # slub_test: pass:10 fail:0 skip:0 total:10
> [   23.151619] # Totals: pass:10 fail:0 skip:0 total:10
> [   23.151620] ok 1 slub_test
> urezki@pc638:~$
> <snip>
> 
> but i had to adapt slightly the Vlastimil's test:
> 
> diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
> index f11691315c2f..222f6d204b0d 100644
> --- a/lib/slub_kunit.c
> +++ b/lib/slub_kunit.c
> @@ -6,6 +6,7 @@
>  #include <linux/module.h>
>  #include <linux/kernel.h>
>  #include <linux/rcupdate.h>
> +#include <linux/delay.h>
>  #include "../mm/slab.h"
> 
>  static struct kunit_resource resource;
> @@ -181,6 +182,63 @@ static void test_kfree_rcu(struct kunit *test)
>         KUNIT_EXPECT_EQ(test, 0, slab_errors);
>  }
> 
> +struct cache_destroy_work {
> +        struct work_struct work;
> +        struct kmem_cache *s;
> +};
> +
> +static void cache_destroy_workfn(struct work_struct *w)
> +{
> +       struct cache_destroy_work *cdw;
> +
> +       cdw = container_of(w, struct cache_destroy_work, work);
> +       kmem_cache_destroy(cdw->s);
> +}
> +
> +#define KMEM_CACHE_DESTROY_NR 10
> +
> +static void test_kfree_rcu_wq_destroy(struct kunit *test)
> +{
> +       struct test_kfree_rcu_struct *p;
> +       struct cache_destroy_work cdw;
> +       struct workqueue_struct *wq;
> +       struct kmem_cache *s;
> +       unsigned int rnd;
> +       int i;
> +
> +       if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
> +               kunit_skip(test, "can't do kfree_rcu() when test is built-in");
> +
> +       INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
> +       wq = alloc_workqueue("test_kfree_rcu_destroy_wq",
> +                       WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
> +
> +       if (!wq)
> +               kunit_skip(test, "failed to alloc wq");
> +
> +       for (i = 0; i < KMEM_CACHE_DESTROY_NR; i++) {
> +               s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
> +                               sizeof(struct test_kfree_rcu_struct),
> +                               SLAB_NO_MERGE);
> +
> +               if (!s)
> +                       kunit_skip(test, "failed to create cache");
> +
> +               rnd = get_random_u8() % 255;
> +               p = kmem_cache_alloc(s, GFP_KERNEL);
> +               kfree_rcu(p, rcu);
> +
> +               cdw.s = s;
> +
> +               msleep(rnd);
> +               queue_work(wq, &cdw.work);
> +               flush_work(&cdw.work);
> +       }
> +
> +       destroy_workqueue(wq);
> +       KUNIT_EXPECT_EQ(test, 0, slab_errors);
> +}
> +
>  static void test_leak_destroy(struct kunit *test)
>  {
>         struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
> @@ -254,6 +312,7 @@ static struct kunit_case test_cases[] = {
>         KUNIT_CASE(test_clobber_redzone_free),
>         KUNIT_CASE(test_kmalloc_redzone_access),
>         KUNIT_CASE(test_kfree_rcu),
> +       KUNIT_CASE(test_kfree_rcu_wq_destroy),
>         KUNIT_CASE(test_leak_destroy),
>         KUNIT_CASE(test_krealloc_redzone_zeroing),
>         {}
> 
> --
> Uladzislau Rezki
>
WQ_MEM_RECLAIM-patch fixes this for me:

<snip>
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 4030907b6b7d..1b5ed5512782 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444);
 static int rcu_delay_page_cache_fill_msec = 5000;
 module_param(rcu_delay_page_cache_fill_msec, int, 0444);

+static struct workqueue_struct *rcu_reclaim_wq;
+
 /* Maximum number of jiffies to wait before draining a batch. */
 #define KFREE_DRAIN_JIFFIES (5 * HZ)
 #define KFREE_N_BATCHES 2
@@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
        if (delayed_work_pending(&krcp->monitor_work)) {
                delay_left = krcp->monitor_work.timer.expires - jiffies;
                if (delay < delay_left)
-                       mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
+                       mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
                return;
        }
-       queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
+       queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
 }

 static void
@@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
                        // "free channels", the batch can handle. Break
                        // the loop since it is done with this CPU thus
                        // queuing an RCU work is _always_ success here.
-                       queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
+                       queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work);
                        WARN_ON_ONCE(!queued);
                        break;
                }
@@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
        if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
                        !atomic_xchg(&krcp->work_in_progress, 1)) {
                if (atomic_read(&krcp->backoff_page_cache_fill)) {
-                       queue_delayed_work(system_unbound_wq,
+                       queue_delayed_work(rcu_reclaim_wq,
                                &krcp->page_cache_work,
                                        msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
                } else {
@@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void)
        int i, j;
        struct shrinker *kfree_rcu_shrinker;

+       rcu_reclaim_wq = alloc_workqueue("rcu_reclaim",
+               WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+       WARN_ON(!rcu_reclaim_wq);
+
        /* Clamp it to [0:100] seconds interval. */
        if (rcu_delay_page_cache_fill_msec < 0 ||
                rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
<snip>

it passes:

<snip>
[   15.972416] KTAP version 1
[   15.972421] 1..1
[   15.973467]     KTAP version 1
[   15.973470]     # Subtest: slub_test
[   15.973472]     # module: slub_kunit
[   15.973474]     1..10
[   15.974483]     ok 1 test_clobber_zone
[   15.974927]     ok 2 test_next_pointer
[   15.975308]     ok 3 test_first_word
[   15.975672]     ok 4 test_clobber_50th_byte
[   15.976035]     ok 5 test_clobber_redzone_free
[   15.976128] stackdepot: allocating hash table of 1048576 entries via kvcalloc
[   15.979505]     ok 6 test_kmalloc_redzone_access
[   16.014408]     ok 7 test_kfree_rcu
[   17.726602]     ok 8 test_kfree_rcu_wq_destroy
[   17.750323]     ok 9 test_leak_destroy
[   17.750883]     ok 10 test_krealloc_redzone_zeroing
[   17.750887] # slub_test: pass:10 fail:0 skip:0 total:10
[   17.750890] # Totals: pass:10 fail:0 skip:0 total:10
[   17.750891] ok 1 slub_test
<snip>

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 18:11                 ` Vlastimil Babka
@ 2025-02-25 18:21                   ` Uladzislau Rezki
  0 siblings, 0 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-25 18:21 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Keith Busch, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 07:11:05PM +0100, Vlastimil Babka wrote:
> On 2/25/25 18:41, Uladzislau Rezki wrote:
> > 
> > but i had to adapt slightly the Vlastimil's test:
> 
> Great, works for me too, thanks!
> 
Sounds good :)

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 18:21                 ` Uladzislau Rezki
@ 2025-02-26 10:59                   ` Vlastimil Babka
  2025-02-26 14:31                     ` Uladzislau Rezki
  2025-02-26 15:51                   ` Keith Busch
  1 sibling, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-26 10:59 UTC (permalink / raw)
  To: Uladzislau Rezki, Keith Busch
  Cc: Paul E. McKenney, Joel Fernandes, Josh Triplett, Boqun Feng,
	Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
>>
> WQ_MEM_RECLAIM-patch fixes this for me:

Sounds good, can you send a formal patch then?
Some nits below:

> <snip>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 4030907b6b7d..1b5ed5512782 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444);
>  static int rcu_delay_page_cache_fill_msec = 5000;
>  module_param(rcu_delay_page_cache_fill_msec, int, 0444);
> 
> +static struct workqueue_struct *rcu_reclaim_wq;
> +
>  /* Maximum number of jiffies to wait before draining a batch. */
>  #define KFREE_DRAIN_JIFFIES (5 * HZ)
>  #define KFREE_N_BATCHES 2
> @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
>         if (delayed_work_pending(&krcp->monitor_work)) {
>                 delay_left = krcp->monitor_work.timer.expires - jiffies;
>                 if (delay < delay_left)
> -                       mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> +                       mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
>                 return;
>         }
> -       queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> +       queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
>  }
> 
>  static void
> @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
>                         // "free channels", the batch can handle. Break
>                         // the loop since it is done with this CPU thus
>                         // queuing an RCU work is _always_ success here.
> -                       queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
> +                       queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work);
>                         WARN_ON_ONCE(!queued);
>                         break;
>                 }
> @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
>         if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
>                         !atomic_xchg(&krcp->work_in_progress, 1)) {
>                 if (atomic_read(&krcp->backoff_page_cache_fill)) {
> -                       queue_delayed_work(system_unbound_wq,
> +                       queue_delayed_work(rcu_reclaim_wq,
>                                 &krcp->page_cache_work,
>                                         msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
>                 } else {
> @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void)
>         int i, j;
>         struct shrinker *kfree_rcu_shrinker;
> 
> +       rcu_reclaim_wq = alloc_workqueue("rcu_reclaim",

Should we name it "kvfree_rcu_reclaim"? rcu_reclaim sounds too generic
as if it's part of rcu itself?

> +               WQ_UNBOUND | WQ_MEM_RECLAIM, 0);

Do we want WQ_SYSFS? Or maybe only when someone asks, with a use case?

Thanks,
Vlastimil

> +       WARN_ON(!rcu_reclaim_wq);
> +
>         /* Clamp it to [0:100] seconds interval. */
>         if (rcu_delay_page_cache_fill_msec < 0 ||
>                 rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
> <snip>
> 
> it passes:
> 
> <snip>
> [   15.972416] KTAP version 1
> [   15.972421] 1..1
> [   15.973467]     KTAP version 1
> [   15.973470]     # Subtest: slub_test
> [   15.973472]     # module: slub_kunit
> [   15.973474]     1..10
> [   15.974483]     ok 1 test_clobber_zone
> [   15.974927]     ok 2 test_next_pointer
> [   15.975308]     ok 3 test_first_word
> [   15.975672]     ok 4 test_clobber_50th_byte
> [   15.976035]     ok 5 test_clobber_redzone_free
> [   15.976128] stackdepot: allocating hash table of 1048576 entries via kvcalloc
> [   15.979505]     ok 6 test_kmalloc_redzone_access
> [   16.014408]     ok 7 test_kfree_rcu
> [   17.726602]     ok 8 test_kfree_rcu_wq_destroy
> [   17.750323]     ok 9 test_leak_destroy
> [   17.750883]     ok 10 test_krealloc_redzone_zeroing
> [   17.750887] # slub_test: pass:10 fail:0 skip:0 total:10
> [   17.750890] # Totals: pass:10 fail:0 skip:0 total:10
> [   17.750891] ok 1 slub_test
> <snip>
> 
> --
> Uladzislau Rezki



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 10:59                   ` Vlastimil Babka
@ 2025-02-26 14:31                     ` Uladzislau Rezki
  2025-02-26 14:36                       ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-26 14:31 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Keith Busch, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Wed, Feb 26, 2025 at 11:59:53AM +0100, Vlastimil Babka wrote:
> On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
> >>
> > WQ_MEM_RECLAIM-patch fixes this for me:
> 
> Sounds good, can you send a formal patch then?
>
Do you mean both? Test case and fix? I can :)

> Some nits below:
> 
> > <snip>
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 4030907b6b7d..1b5ed5512782 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444);
> >  static int rcu_delay_page_cache_fill_msec = 5000;
> >  module_param(rcu_delay_page_cache_fill_msec, int, 0444);
> > 
> > +static struct workqueue_struct *rcu_reclaim_wq;
> > +
> >  /* Maximum number of jiffies to wait before draining a batch. */
> >  #define KFREE_DRAIN_JIFFIES (5 * HZ)
> >  #define KFREE_N_BATCHES 2
> > @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
> >         if (delayed_work_pending(&krcp->monitor_work)) {
> >                 delay_left = krcp->monitor_work.timer.expires - jiffies;
> >                 if (delay < delay_left)
> > -                       mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> > +                       mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
> >                 return;
> >         }
> > -       queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> > +       queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
> >  }
> > 
> >  static void
> > @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
> >                         // "free channels", the batch can handle. Break
> >                         // the loop since it is done with this CPU thus
> >                         // queuing an RCU work is _always_ success here.
> > -                       queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
> > +                       queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work);
> >                         WARN_ON_ONCE(!queued);
> >                         break;
> >                 }
> > @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
> >         if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
> >                         !atomic_xchg(&krcp->work_in_progress, 1)) {
> >                 if (atomic_read(&krcp->backoff_page_cache_fill)) {
> > -                       queue_delayed_work(system_unbound_wq,
> > +                       queue_delayed_work(rcu_reclaim_wq,
> >                                 &krcp->page_cache_work,
> >                                         msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
> >                 } else {
> > @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void)
> >         int i, j;
> >         struct shrinker *kfree_rcu_shrinker;
> > 
> > +       rcu_reclaim_wq = alloc_workqueue("rcu_reclaim",
> 
> Should we name it "kvfree_rcu_reclaim"? rcu_reclaim sounds too generic
> as if it's part of rcu itself?
> 
> > +               WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
> 
> Do we want WQ_SYSFS? Or maybe only when someone asks, with a use case?
> 
If someone asks, IMO.

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 14:31                     ` Uladzislau Rezki
@ 2025-02-26 14:36                       ` Vlastimil Babka
  2025-02-26 15:42                         ` Uladzislau Rezki
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-26 14:36 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Keith Busch, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/26/25 3:31 PM, Uladzislau Rezki wrote:
> On Wed, Feb 26, 2025 at 11:59:53AM +0100, Vlastimil Babka wrote:
>> On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
>>>>
>>> WQ_MEM_RECLAIM-patch fixes this for me:
>>
>> Sounds good, can you send a formal patch then?
>>
> Do you mean both? Test case and fix? I can :)

Sure, but only the fix is for stable. Thanks!



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 14:36                       ` Vlastimil Babka
@ 2025-02-26 15:42                         ` Uladzislau Rezki
  2025-02-26 15:46                           ` Vlastimil Babka
  0 siblings, 1 reply; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-26 15:42 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Keith Busch, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Wed, Feb 26, 2025 at 03:36:39PM +0100, Vlastimil Babka wrote:
> On 2/26/25 3:31 PM, Uladzislau Rezki wrote:
> > On Wed, Feb 26, 2025 at 11:59:53AM +0100, Vlastimil Babka wrote:
> >> On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
> >>>>
> >>> WQ_MEM_RECLAIM-patch fixes this for me:
> >>
> >> Sounds good, can you send a formal patch then?
> >>
> > Do you mean both? Test case and fix? I can :)
> 
> Sure, but only the fix is for stable. Thanks!
> 
It is taken by Gregg if there is a Fixes tag in the commit.
What do you mean: the fix is for stable? The current Linus
tree is not suffering from this?

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 15:42                         ` Uladzislau Rezki
@ 2025-02-26 15:46                           ` Vlastimil Babka
  2025-02-26 15:57                             ` Uladzislau Rezki
  0 siblings, 1 reply; 44+ messages in thread
From: Vlastimil Babka @ 2025-02-26 15:46 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Keith Busch, Paul E. McKenney, Joel Fernandes, Josh Triplett,
	Boqun Feng, Christoph Lameter, David Rientjes, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Julia Lawall,
	Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On 2/26/25 4:42 PM, Uladzislau Rezki wrote:
> On Wed, Feb 26, 2025 at 03:36:39PM +0100, Vlastimil Babka wrote:
>> On 2/26/25 3:31 PM, Uladzislau Rezki wrote:
>>> On Wed, Feb 26, 2025 at 11:59:53AM +0100, Vlastimil Babka wrote:
>>>> On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
>>>>>>
>>>>> WQ_MEM_RECLAIM-patch fixes this for me:
>>>>
>>>> Sounds good, can you send a formal patch then?
>>>>
>>> Do you mean both? Test case and fix? I can :)
>>
>> Sure, but only the fix is for stable. Thanks!
>>
> It is taken by Gregg if there is a Fixes tag in the commit.
> What do you mean: the fix is for stable? The current Linus
> tree is not suffering from this?

I just meant the fix should be a Cc: stable, and the testcase not.
mm/ has an exception from "anything with Fixes: can be taken to stable"

> 
> --
> Uladzislau Rezki



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-25 18:21                 ` Uladzislau Rezki
  2025-02-26 10:59                   ` Vlastimil Babka
@ 2025-02-26 15:51                   ` Keith Busch
  2025-02-26 15:58                     ` Uladzislau Rezki
  1 sibling, 1 reply; 44+ messages in thread
From: Keith Busch @ 2025-02-26 15:51 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Keith Busch, Vlastimil Babka, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Tue, Feb 25, 2025 at 07:21:19PM +0100, Uladzislau Rezki wrote:
> WQ_MEM_RECLAIM-patch fixes this for me:

This is successful with the new kuint test for me as well. I can't
readily test this in production where I first learned of this issue (at
least not in the near term), but for what it's worth, this looks like a
good change to me.

Reviewed-by: Keith Busch <kbusch@kernel.org>
 
> <snip>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 4030907b6b7d..1b5ed5512782 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444);
>  static int rcu_delay_page_cache_fill_msec = 5000;
>  module_param(rcu_delay_page_cache_fill_msec, int, 0444);
> 
> +static struct workqueue_struct *rcu_reclaim_wq;
> +
>  /* Maximum number of jiffies to wait before draining a batch. */
>  #define KFREE_DRAIN_JIFFIES (5 * HZ)
>  #define KFREE_N_BATCHES 2
> @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
>         if (delayed_work_pending(&krcp->monitor_work)) {
>                 delay_left = krcp->monitor_work.timer.expires - jiffies;
>                 if (delay < delay_left)
> -                       mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> +                       mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
>                 return;
>         }
> -       queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
> +       queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
>  }
> 
>  static void
> @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
>                         // "free channels", the batch can handle. Break
>                         // the loop since it is done with this CPU thus
>                         // queuing an RCU work is _always_ success here.
> -                       queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
> +                       queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work);
>                         WARN_ON_ONCE(!queued);
>                         break;
>                 }
> @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
>         if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
>                         !atomic_xchg(&krcp->work_in_progress, 1)) {
>                 if (atomic_read(&krcp->backoff_page_cache_fill)) {
> -                       queue_delayed_work(system_unbound_wq,
> +                       queue_delayed_work(rcu_reclaim_wq,
>                                 &krcp->page_cache_work,
>                                         msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
>                 } else {
> @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void)
>         int i, j;
>         struct shrinker *kfree_rcu_shrinker;
> 
> +       rcu_reclaim_wq = alloc_workqueue("rcu_reclaim",
> +               WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
> +       WARN_ON(!rcu_reclaim_wq);
> +
>         /* Clamp it to [0:100] seconds interval. */
>         if (rcu_delay_page_cache_fill_msec < 0 ||
>                 rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
> <snip>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 15:46                           ` Vlastimil Babka
@ 2025-02-26 15:57                             ` Uladzislau Rezki
  0 siblings, 0 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-26 15:57 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Uladzislau Rezki, Keith Busch, Paul E. McKenney, Joel Fernandes,
	Josh Triplett, Boqun Feng, Christoph Lameter, David Rientjes,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Julia Lawall, Jakub Kicinski, Jason A. Donenfeld, Andrew Morton,
	Roman Gushchin, Hyeonggon Yoo, linux-mm, linux-kernel, rcu,
	Alexander Potapenko, Marco Elver, Dmitry Vyukov, kasan-dev,
	Jann Horn, Mateusz Guzik, linux-nvme, leitao

On Wed, Feb 26, 2025 at 04:46:38PM +0100, Vlastimil Babka wrote:
> On 2/26/25 4:42 PM, Uladzislau Rezki wrote:
> > On Wed, Feb 26, 2025 at 03:36:39PM +0100, Vlastimil Babka wrote:
> >> On 2/26/25 3:31 PM, Uladzislau Rezki wrote:
> >>> On Wed, Feb 26, 2025 at 11:59:53AM +0100, Vlastimil Babka wrote:
> >>>> On 2/25/25 7:21 PM, Uladzislau Rezki wrote:
> >>>>>>
> >>>>> WQ_MEM_RECLAIM-patch fixes this for me:
> >>>>
> >>>> Sounds good, can you send a formal patch then?
> >>>>
> >>> Do you mean both? Test case and fix? I can :)
> >>
> >> Sure, but only the fix is for stable. Thanks!
> >>
> > It is taken by Gregg if there is a Fixes tag in the commit.
> > What do you mean: the fix is for stable? The current Linus
> > tree is not suffering from this?
> 
> I just meant the fix should be a Cc: stable, and the testcase not.
> mm/ has an exception from "anything with Fixes: can be taken to stable"
> 
Got it.

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()
  2025-02-26 15:51                   ` Keith Busch
@ 2025-02-26 15:58                     ` Uladzislau Rezki
  0 siblings, 0 replies; 44+ messages in thread
From: Uladzislau Rezki @ 2025-02-26 15:58 UTC (permalink / raw)
  To: Keith Busch
  Cc: Uladzislau Rezki, Keith Busch, Vlastimil Babka, Paul E. McKenney,
	Joel Fernandes, Josh Triplett, Boqun Feng, Christoph Lameter,
	David Rientjes, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Zqiang, Julia Lawall, Jakub Kicinski, Jason A. Donenfeld,
	Andrew Morton, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel, rcu, Alexander Potapenko, Marco Elver,
	Dmitry Vyukov, kasan-dev, Jann Horn, Mateusz Guzik, linux-nvme,
	leitao

On Wed, Feb 26, 2025 at 08:51:37AM -0700, Keith Busch wrote:
> On Tue, Feb 25, 2025 at 07:21:19PM +0100, Uladzislau Rezki wrote:
> > WQ_MEM_RECLAIM-patch fixes this for me:
> 
> This is successful with the new kuint test for me as well. I can't
> readily test this in production where I first learned of this issue (at
> least not in the near term), but for what it's worth, this looks like a
> good change to me.
> 
> Reviewed-by: Keith Busch <kbusch@kernel.org>
>  
Thank you for checking. I will apply this.

--
Uladzislau Rezki


^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2025-02-26 15:58 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240807-b4-slab-kfree_rcu-destroy-v2-0-ea79102f428c@suse.cz>
2024-08-09 15:02 ` [-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy() Vlastimil Babka
2024-08-09 15:12   ` Jann Horn
2024-08-09 15:14     ` Vlastimil Babka
2024-08-10  0:11       ` Andrew Morton
2024-08-10 20:25         ` Vlastimil Babka
2024-08-10 20:30           ` Andrew Morton
     [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-5-ea79102f428c@suse.cz>
2024-08-09 16:26   ` [PATCH v2 5/7] rcu/kvfree: Add kvfree_rcu_barrier() API Uladzislau Rezki
2024-08-09 17:00     ` Vlastimil Babka
2024-08-20 16:02       ` Uladzislau Rezki
     [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-7-ea79102f428c@suse.cz>
2024-08-09 16:23   ` [PATCH v2 7/7] kunit, slub: add test_kfree_rcu() and test_leak_destroy() Uladzislau Rezki
2024-09-14 13:22   ` Hyeonggon Yoo
2024-09-14 18:39     ` Vlastimil Babka
2024-09-20 13:35   ` Guenter Roeck
2024-09-21 20:40     ` Vlastimil Babka
2024-09-21 21:08       ` Guenter Roeck
2024-09-21 21:25         ` Vlastimil Babka
2024-09-22  6:16           ` Hyeonggon Yoo
2024-09-22 14:13             ` Guenter Roeck
2024-09-25 12:56               ` Hyeonggon Yoo
2024-09-26 12:54                 ` Vlastimil Babka
2024-09-30  8:47                   ` Vlastimil Babka
     [not found] ` <20240807-b4-slab-kfree_rcu-destroy-v2-6-ea79102f428c@suse.cz>
2025-02-21 16:30   ` [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy() Keith Busch
2025-02-21 16:51     ` Mateusz Guzik
2025-02-21 16:52       ` Mateusz Guzik
2025-02-21 17:28     ` Vlastimil Babka
2025-02-24 11:44       ` Uladzislau Rezki
2025-02-24 15:37         ` Keith Busch
2025-02-25  9:57         ` Vlastimil Babka
2025-02-25 13:39           ` Uladzislau Rezki
2025-02-25 14:12             ` Vlastimil Babka
2025-02-25 16:03           ` Keith Busch
2025-02-25 17:05             ` Keith Busch
2025-02-25 17:41               ` Uladzislau Rezki
2025-02-25 18:11                 ` Vlastimil Babka
2025-02-25 18:21                   ` Uladzislau Rezki
2025-02-25 18:21                 ` Uladzislau Rezki
2025-02-26 10:59                   ` Vlastimil Babka
2025-02-26 14:31                     ` Uladzislau Rezki
2025-02-26 14:36                       ` Vlastimil Babka
2025-02-26 15:42                         ` Uladzislau Rezki
2025-02-26 15:46                           ` Vlastimil Babka
2025-02-26 15:57                             ` Uladzislau Rezki
2025-02-26 15:51                   ` Keith Busch
2025-02-26 15:58                     ` Uladzislau Rezki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox