linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Tal Zussman <tz2294@columbia.edu>
To: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Christian Brauner <brauner@kernel.org>,
	"Darrick J. Wong" <djwong@kernel.org>,
	Carlos Maiolino <cem@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>, Jan Kara <jack@suse.cz>
Cc: Dave Chinner <dgc@kernel.org>,
	Bart Van Assche <bvanassche@acm.org>,
	Gao Xiang <hsiangkao@linux.alibaba.com>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH 8/8] RFC: use a TASK_FIFO kthread for read completion support
Date: Thu, 9 Apr 2026 15:06:47 -0400	[thread overview]
Message-ID: <2cdaa767-c071-4e84-b9d7-1c944407f5bb@columbia.edu> (raw)
In-Reply-To: <20260409160243.1008358-9-hch@lst.de>



On 4/9/26 12:02 PM, Christoph Hellwig wrote:
> Commit 3fffb589b9a6 ("erofs: add per-cpu threads for decompression as an
> option") explains why workqueue aren't great for low-latency completion
> handling.  Switch to a per-cpu kthread to handle it instead.  This code
> is based on the erofs code in the above commit, but further simplified
> by directly using a kthread instead of a kthread_work.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/bio.c | 117 +++++++++++++++++++++++++++++-----------------------
>   1 file changed, 65 insertions(+), 52 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 88d191455762..6a993fb129a0 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -19,7 +19,7 @@
>   #include <linux/blk-crypto.h>
>   #include <linux/xarray.h>
>   #include <linux/kmemleak.h>
> -#include <linux/llist.h>
> +#include <linux/freezer.h>

Why freezer.h and not kthread.h?

>   #include <trace/events/block.h>
>   #include "blk.h"
> @@ -1718,51 +1718,83 @@ void bio_check_pages_dirty(struct bio *bio)
>   EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
>   
>   struct bio_complete_batch {
> -	struct llist_head list;

If we go with this approach, we should remove the newly-added bi_llist from
struct bio too.

> -	struct delayed_work work;
> -	int cpu;
> +	spinlock_t lock;
> +	struct bio_list bios;
> +	struct task_struct *worker;
>   };
>   
>   static DEFINE_PER_CPU(struct bio_complete_batch, bio_complete_batch);
> -static struct workqueue_struct *bio_complete_wq;
>   
> -static void bio_complete_work_fn(struct work_struct *w)
> +static bool bio_try_complete_batch(struct bio_complete_batch *batch)
>   {
> -	struct delayed_work *dw = to_delayed_work(w);
> -	struct bio_complete_batch *batch =
> -		container_of(dw, struct bio_complete_batch, work);
> -	struct llist_node *node;
> -	struct bio *bio, *next;
> +	struct bio_list bios;
> +	unsigned long flags;
> +	struct bio *bio;
>   
> -	do {
> -		node = llist_del_all(&batch->list);
> -		if (!node)
> -			break;
> +	spin_lock_irqsave(&batch->lock, flags);
> +	bios = batch->bios;
> +	bio_list_init(&batch->bios);
> +	spin_unlock_irqrestore(&batch->lock, flags);
>   
> -		node = llist_reverse_order(node);
> -		llist_for_each_entry_safe(bio, next, node, bi_llist)
> -			bio->bi_end_io(bio);
> +	if (bio_list_empty(&bios))
> +		return false;
>   
> -		if (need_resched()) {
> -			if (!llist_empty(&batch->list))
> -				mod_delayed_work_on(batch->cpu,
> -						    bio_complete_wq,
> -						    &batch->work, 0);
> -			break;
> -		}
> -	} while (1);
> +	__set_current_state(TASK_RUNNING);
> +	while ((bio = bio_list_pop(&bios)))
> +		bio->bi_end_io(bio);
> +	return true;
> +}
> +
> +static int bio_complete_thread(void *private)
> +{
> +	struct bio_complete_batch *batch = private;
> +
> +	for (;;) {
> +		set_current_state(TASK_INTERRUPTIBLE);
> +		if (!bio_try_complete_batch(batch))
> +			schedule();
> +	}
> +
> +	return 0;
>   }
>   
>   void __bio_complete_in_task(struct bio *bio)
>   {
> -	struct bio_complete_batch *batch = this_cpu_ptr(&bio_complete_batch);
> +	struct bio_complete_batch *batch;
> +	unsigned long flags;
> +	bool wake;
> +
> +	get_cpu();
> +	batch = this_cpu_ptr(&bio_complete_batch);
> +	spin_lock_irqsave(&batch->lock, flags);
> +	wake = bio_list_empty(&batch->bios);
> +	bio_list_add(&batch->bios, bio);
> +	spin_unlock_irqrestore(&batch->lock, flags);
> +	put_cpu();
>   
> -	if (llist_add(&bio->bi_llist, &batch->list))
> -		mod_delayed_work_on(batch->cpu, bio_complete_wq,
> -				    &batch->work, 1);
> +	if (wake)
> +		wake_up_process(batch->worker);
>   }
>   EXPORT_SYMBOL_GPL(__bio_complete_in_task);
>   
> +static void __init bio_complete_batch_init(int cpu)
> +{
> +	struct bio_complete_batch *batch =
> +		per_cpu_ptr(&bio_complete_batch, cpu);
> +	struct task_struct *worker;
> +
> +	worker = kthread_create_on_cpu(bio_complete_thread,
> +			per_cpu_ptr(&bio_complete_batch, cpu),
> +			cpu, "bio_worker/%u");
> +	if (IS_ERR(worker))
> +		panic("bio: can't create kthread_work");
> +	sched_set_fifo_low(worker);
> +
> +	spin_lock_init(&batch->lock);
> +	bio_list_init(&batch->bios);
> +	batch->worker = worker;
> +}
> +
>   static inline bool bio_remaining_done(struct bio *bio)
>   {
>   	/*
> @@ -2028,16 +2060,7 @@ EXPORT_SYMBOL(bioset_init);
>    */
>   static int bio_complete_batch_cpu_dead(unsigned int cpu)
>   {
> -	struct bio_complete_batch *batch =
> -		per_cpu_ptr(&bio_complete_batch, cpu);
> -	struct llist_node *node;
> -	struct bio *bio, *next;
> -
> -	node = llist_del_all(&batch->list);
> -	node = llist_reverse_order(node);
> -	llist_for_each_entry_safe(bio, next, node, bi_llist)
> -		bio->bi_end_io(bio);
> -
> +	bio_try_complete_batch(per_cpu_ptr(&bio_complete_batch, cpu));
>   	return 0;
>   }
>   
> @@ -2055,18 +2078,8 @@ static int __init init_bio(void)
>   				SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
>   	}
>   
> -	for_each_possible_cpu(i) {
> -		struct bio_complete_batch *batch =
> -			per_cpu_ptr(&bio_complete_batch, i);
> -
> -		init_llist_head(&batch->list);
> -		INIT_DELAYED_WORK(&batch->work, bio_complete_work_fn);
> -		batch->cpu = i;
> -	}
> -
> -	bio_complete_wq = alloc_workqueue("bio_complete", WQ_MEM_RECLAIM, 0);
> -	if (!bio_complete_wq)
> -		panic("bio: can't allocate bio_complete workqueue\n");
> +	for_each_possible_cpu(i)
> +		bio_complete_batch_init(i);
>   
>   	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "block/bio:complete:dead",
>   				NULL, bio_complete_batch_cpu_dead);
> -- 
> 2.47.3
> 



      reply	other threads:[~2026-04-09 19:06 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-09 16:02 bio completion in task enhancements / experiments Christoph Hellwig
2026-04-09 16:02 ` [PATCH 1/8] block: add BIO_COMPLETE_IN_TASK for task-context completion Christoph Hellwig
2026-04-09 16:02 ` [PATCH 2/8] iomap: use BIO_COMPLETE_IN_TASK for dropbehind writeback Christoph Hellwig
2026-04-09 16:02 ` [PATCH 3/8] block: enable RWF_DONTCACHE for block devices Christoph Hellwig
2026-04-09 16:02 ` [PATCH 4/8] FOLD: block: change the defer in task context interface to be procedural Christoph Hellwig
2026-04-09 20:18   ` Matthew Wilcox
2026-04-09 16:02 ` [PATCH 5/8] FOLD: don't use in_task() to decide for offloading Christoph Hellwig
2026-04-09 16:02 ` [PATCH 6/8] iomap: use bio_complete_in_task for buffered read errors Christoph Hellwig
2026-04-09 16:02 ` [PATCH 7/8] iomap: use bio_complete_in_task for buffered write completions Christoph Hellwig
2026-04-09 16:02 ` [PATCH 8/8] RFC: use a TASK_FIFO kthread for read completion support Christoph Hellwig
2026-04-09 19:06   ` Tal Zussman [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2cdaa767-c071-4e84-b9d7-1c944407f5bb@columbia.edu \
    --to=tz2294@columbia.edu \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=bvanassche@acm.org \
    --cc=cem@kernel.org \
    --cc=dgc@kernel.org \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=hsiangkao@linux.alibaba.com \
    --cc=jack@suse.cz \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox