linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Julian Sun <sunjunchao@bytedance.com>
To: Tejun Heo <tj@kernel.org>
Cc: linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org, viro@zeniv.linux.org.uk, brauner@kernel.org,
	jack@suse.cz, hannes@cmpxchg.org, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeel.butt@linux.dev,
	muchun.song@linux.dev, axboe@kernel.dk
Subject: Re: [External] Re: [PATCH] memcg: Don't wait writeback completion when release memcg.
Date: Fri, 22 Aug 2025 16:22:09 +0800	[thread overview]
Message-ID: <f1ff9656-6633-4a32-ab32-9ee60400b9b0@bytedance.com> (raw)
In-Reply-To: <aKds9ZMUTC8VztEt@slm.duckdns.org>

On 8/22/25 3:01 AM, Tejun Heo wrote:

Hi,

> Hello,
> 
> On Fri, Aug 22, 2025 at 02:00:10AM +0800, Julian Sun wrote:
> ...
>> Do you mean logic like this?
>>
>>      for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
>>          wb_wait_for_completion(&memcg->cgwb_frn[i].done);
>>      kfree(memcg);
>>
>> But there still exist task hang issues as long as
>> wb_wait_for_completion() exists.
> 
> Ah, right. I was just thinking about the workqueue being stalled. The
> problem is that the wait itself is too long.
> 
>> I think the scope of impact of the current changes should be
>> manageable. I have checked all the other places where wb_queue_work()
>> is called, and their free_done values are all 0, and I also tested
>> this patch with the reproducer in [1] with kasan and kmemleak enabled.
>> The test result looks fine, so this should not have a significant
>> impact.
>> What do you think?
> 
> My source of reluctance is that it's a peculiar situation where flushing of
> a cgroup takes that long due to hard throttling and the self-freeing
> mechanism isn't the prettiest thing. Do you think you can do the same thing
> through custom waitq wakeup function?

Yeah, this method looks more general if I understand correctly.

If the idea of the following code makes sense to you, I'd like to split
and convert it into formal patches.

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index a07b8cf73ae2..10fede792178 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -172,13 +172,8 @@ static void finish_writeback_work(struct 
wb_writeback_work *work)

  	if (work->auto_free)
  		kfree(work);
-	if (done) {
-		wait_queue_head_t *waitq = done->waitq;
-
-		/* @done can't be accessed after the following dec */
-		if (atomic_dec_and_test(&done->cnt))
-			wake_up_all(waitq);
-	}
+	if (done)
+		done->wb_waitq->wb_wakeup_func(done->wb_waitq, done);
  }

  static void wb_queue_work(struct bdi_writeback *wb,
@@ -213,7 +208,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
  void wb_wait_for_completion(struct wb_completion *done)
  {
  	atomic_dec(&done->cnt);		/* put down the initial count */
-	wait_event(*done->waitq, !atomic_read(&done->cnt));
+	wait_event(done->wb_waitq->waitq, !atomic_read(&done->cnt));
  }

  #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/backing-dev-defs.h 
b/include/linux/backing-dev-defs.h
index 2ad261082bba..04699458ac50 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -60,13 +60,56 @@ enum wb_reason {
  	WB_REASON_MAX,
  };

+struct wb_completion;
+typedef struct wb_wait_queue_head wb_wait_queue_head_t;
+typedef void (*wb_wait_wakeup_func_t)(wb_wait_queue_head_t *wq_waitq,
+									  struct wb_completion *done);
+struct wb_wait_queue_head {
+	wait_queue_head_t waitq;
+	wb_wait_wakeup_func_t wb_wakeup_func;
+};
+
  struct wb_completion {
  	atomic_t		cnt;
-	wait_queue_head_t	*waitq;
+	wb_wait_queue_head_t	*wb_waitq;
  };

+static inline void wb_default_wakeup_func(wb_wait_queue_head_t *wq_waitq,
+										  struct wb_completion *done)
+{
+	/* @done can't be accessed after the following dec */
+	if (atomic_dec_and_test(&done->cnt))
+		wake_up_all(&wq_waitq->waitq);
+}
+
+/* used for cgwb_frn, be careful here, @done can't be accessed */
+static inline void wb_empty_wakeup_func(wb_wait_queue_head_t *wq_waitq,
+										struct wb_completion *done)
+{
+}
+
+#define __init_wb_waitqueue_head(wb_waitq, func) 	\
+	do {											\
+		init_waitqueue_head(&wb_waitq.waitq);		\
+		wb_waitq.wb_wakeup_func = func; 			\
+	} while (0)
+
+#define init_wb_waitqueue_head(wb_waitq) 	\
+	__init_wb_waitqueue_head(wb_waitq, wb_default_wakeup_func)
+
+#define __WB_WAIT_QUEUE_HEAD_INITIALIZER(name, func) {	\
+	.waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.waitq),	\
+	.wb_wakeup_func = func, 							\
+}
+
+#define __DECLARE_WB_WAIT_QUEUE_HEAD(name, func) \
+	struct wb_wait_queue_head name = 
__WB_WAIT_QUEUE_HEAD_INITIALIZER(name, func)
+
+#define DECLARE_WB_WAIT_QUEUE_HEAD(name) \
+	__DECLARE_WB_WAIT_QUEUE_HEAD(name, wb_default_wakeup_func)
+
  #define __WB_COMPLETION_INIT(_waitq)	\
-	(struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
+	(struct wb_completion){ .cnt = ATOMIC_INIT(1), .wb_waitq = (_waitq) }

  /*
   * If one wants to wait for one or more wb_writeback_works, each work's
@@ -190,7 +233,7 @@ struct backing_dev_info {
  	struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
  	struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
  #endif
-	wait_queue_head_t wb_waitq;
+	wb_wait_queue_head_t wb_waitq;

  	struct device *dev;
  	char dev_name[64];
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 783904d8c5ef..c4fec9e22978 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1008,7 +1008,7 @@ int bdi_init(struct backing_dev_info *bdi)
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
  	INIT_LIST_HEAD(&bdi->bdi_list);
  	INIT_LIST_HEAD(&bdi->wb_list);
-	init_waitqueue_head(&bdi->wb_waitq);
+	init_wb_waitqueue_head(bdi->wb_waitq);
  	bdi->last_bdp_sleep = jiffies;

  	return cgwb_bdi_init(bdi);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dd7fbed5a94..999624535470 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -99,7 +99,7 @@ static struct kmem_cache *memcg_cachep;
  static struct kmem_cache *memcg_pn_cachep;

  #ifdef CONFIG_CGROUP_WRITEBACK
-static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
+static __DECLARE_WB_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq, 
wb_empty_wakeup_func);
  #endif

  static inline bool task_is_dying(void)
@@ -3909,12 +3909,7 @@ static void mem_cgroup_css_released(struct 
cgroup_subsys_state *css)
  static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
  {
  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	int __maybe_unused i;

-#ifdef CONFIG_CGROUP_WRITEBACK
-	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
-		wb_wait_for_completion(&memcg->cgwb_frn[i].done);
-#endif
  	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
  		static_branch_dec(&memcg_sockets_enabled_key);



> 
> Thanks.
> 

Thanks,
-- 
Julian Sun <sunjunchao@bytedance.com>


  reply	other threads:[~2025-08-22  8:22 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-20 11:19 [PATCH 0/3] memcg, writeback: Don't wait writeback completion Julian Sun
2025-08-20 11:19 ` [PATCH 1/3] writeback: Rename wb_writeback_work->auto_free to free_work Julian Sun
2025-08-20 11:19 ` [PATCH] writeback: Add wb_writeback_work->free_done Julian Sun
2025-08-20 11:19 ` [PATCH] memcg: Don't wait writeback completion when release memcg Julian Sun
2025-08-20 20:58   ` Tejun Heo
2025-08-21  2:30     ` [External] " Julian Sun
2025-08-21 16:59       ` Tejun Heo
2025-08-21 18:00         ` Julian Sun
2025-08-21 18:16           ` Julian Sun
2025-08-21 19:01           ` Tejun Heo
2025-08-22  8:22             ` Julian Sun [this message]
2025-08-22 17:56               ` Tejun Heo
2025-08-23  6:18                 ` Julian Sun
2025-08-23  8:08                   ` Giorgi Tchankvetadze
2025-08-23  8:22                     ` Julian Sun
2025-08-23 14:08                       ` Giorgi Tchankvetadze
2025-08-23 15:17                         ` Julian Sun
2025-08-25 17:45                 ` Julian Sun
2025-08-25 18:53                   ` Tejun Heo
2025-08-25 19:06                     ` Julian Sun
2025-08-25 10:13       ` Jan Kara
2025-08-25 12:08         ` Julian Sun
2025-08-25 18:57         ` [External] " Tejun Heo
2025-08-20 12:16 ` [PATCH 0/3] memcg, writeback: Don't wait writeback completion Giorgi Tchankvetadze
2025-08-21  2:37   ` [External] " Julian Sun
2025-08-22  9:29     ` Giorgi Tchankvetadze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f1ff9656-6633-4a32-ab32-9ee60400b9b0@bytedance.com \
    --to=sunjunchao@bytedance.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=tj@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox