From: Julian Sun <sunjunchao@bytedance.com>
To: Tejun Heo <tj@kernel.org>
Cc: linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org, viro@zeniv.linux.org.uk, brauner@kernel.org,
jack@suse.cz, hannes@cmpxchg.org, mhocko@kernel.org,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, axboe@kernel.dk
Subject: Re: [External] Re: [PATCH] memcg: Don't wait writeback completion when release memcg.
Date: Fri, 22 Aug 2025 16:22:09 +0800 [thread overview]
Message-ID: <f1ff9656-6633-4a32-ab32-9ee60400b9b0@bytedance.com> (raw)
In-Reply-To: <aKds9ZMUTC8VztEt@slm.duckdns.org>
On 8/22/25 3:01 AM, Tejun Heo wrote:
Hi,
> Hello,
>
> On Fri, Aug 22, 2025 at 02:00:10AM +0800, Julian Sun wrote:
> ...
>> Do you mean logic like this?
>>
>> for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
>> wb_wait_for_completion(&memcg->cgwb_frn[i].done);
>> kfree(memcg);
>>
>> But there still exist task hang issues as long as
>> wb_wait_for_completion() exists.
>
> Ah, right. I was just thinking about the workqueue being stalled. The
> problem is that the wait itself is too long.
>
>> I think the scope of impact of the current changes should be
>> manageable. I have checked all the other places where wb_queue_work()
>> is called, and their free_done values are all 0, and I also tested
>> this patch with the reproducer in [1] with kasan and kmemleak enabled.
>> The test result looks fine, so this should not have a significant
>> impact.
>> What do you think?
>
> My source of reluctance is that it's a peculiar situation where flushing of
> a cgroup takes that long due to hard throttling and the self-freeing
> mechanism isn't the prettiest thing. Do you think you can do the same thing
> through custom waitq wakeup function?
Yeah, this method looks more general if I understand correctly.
If the idea of the following code makes sense to you, I'd like to split
and convert it into formal patches.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index a07b8cf73ae2..10fede792178 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -172,13 +172,8 @@ static void finish_writeback_work(struct
wb_writeback_work *work)
if (work->auto_free)
kfree(work);
- if (done) {
- wait_queue_head_t *waitq = done->waitq;
-
- /* @done can't be accessed after the following dec */
- if (atomic_dec_and_test(&done->cnt))
- wake_up_all(waitq);
- }
+ if (done)
+ done->wb_waitq->wb_wakeup_func(done->wb_waitq, done);
}
static void wb_queue_work(struct bdi_writeback *wb,
@@ -213,7 +208,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
void wb_wait_for_completion(struct wb_completion *done)
{
atomic_dec(&done->cnt); /* put down the initial count */
- wait_event(*done->waitq, !atomic_read(&done->cnt));
+ wait_event(done->wb_waitq->waitq, !atomic_read(&done->cnt));
}
#ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/backing-dev-defs.h
b/include/linux/backing-dev-defs.h
index 2ad261082bba..04699458ac50 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -60,13 +60,56 @@ enum wb_reason {
WB_REASON_MAX,
};
+struct wb_completion;
+typedef struct wb_wait_queue_head wb_wait_queue_head_t;
+typedef void (*wb_wait_wakeup_func_t)(wb_wait_queue_head_t *wq_waitq,
+ struct wb_completion *done);
+struct wb_wait_queue_head {
+ wait_queue_head_t waitq;
+ wb_wait_wakeup_func_t wb_wakeup_func;
+};
+
struct wb_completion {
atomic_t cnt;
- wait_queue_head_t *waitq;
+ wb_wait_queue_head_t *wb_waitq;
};
+static inline void wb_default_wakeup_func(wb_wait_queue_head_t *wq_waitq,
+ struct wb_completion *done)
+{
+ /* @done can't be accessed after the following dec */
+ if (atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wq_waitq->waitq);
+}
+
+/* used for cgwb_frn, be careful here, @done can't be accessed */
+static inline void wb_empty_wakeup_func(wb_wait_queue_head_t *wq_waitq,
+ struct wb_completion *done)
+{
+}
+
+#define __init_wb_waitqueue_head(wb_waitq, func) \
+ do { \
+ init_waitqueue_head(&wb_waitq.waitq); \
+ wb_waitq.wb_wakeup_func = func; \
+ } while (0)
+
+#define init_wb_waitqueue_head(wb_waitq) \
+ __init_wb_waitqueue_head(wb_waitq, wb_default_wakeup_func)
+
+#define __WB_WAIT_QUEUE_HEAD_INITIALIZER(name, func) { \
+ .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.waitq), \
+ .wb_wakeup_func = func, \
+}
+
+#define __DECLARE_WB_WAIT_QUEUE_HEAD(name, func) \
+ struct wb_wait_queue_head name =
__WB_WAIT_QUEUE_HEAD_INITIALIZER(name, func)
+
+#define DECLARE_WB_WAIT_QUEUE_HEAD(name) \
+ __DECLARE_WB_WAIT_QUEUE_HEAD(name, wb_default_wakeup_func)
+
#define __WB_COMPLETION_INIT(_waitq) \
- (struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
+ (struct wb_completion){ .cnt = ATOMIC_INIT(1), .wb_waitq = (_waitq) }
/*
* If one wants to wait for one or more wb_writeback_works, each work's
@@ -190,7 +233,7 @@ struct backing_dev_info {
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#endif
- wait_queue_head_t wb_waitq;
+ wb_wait_queue_head_t wb_waitq;
struct device *dev;
char dev_name[64];
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 783904d8c5ef..c4fec9e22978 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1008,7 +1008,7 @@ int bdi_init(struct backing_dev_info *bdi)
bdi->max_prop_frac = FPROP_FRAC_BASE;
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
- init_waitqueue_head(&bdi->wb_waitq);
+ init_wb_waitqueue_head(bdi->wb_waitq);
bdi->last_bdp_sleep = jiffies;
return cgwb_bdi_init(bdi);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dd7fbed5a94..999624535470 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -99,7 +99,7 @@ static struct kmem_cache *memcg_cachep;
static struct kmem_cache *memcg_pn_cachep;
#ifdef CONFIG_CGROUP_WRITEBACK
-static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
+static __DECLARE_WB_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq,
wb_empty_wakeup_func);
#endif
static inline bool task_is_dying(void)
@@ -3909,12 +3909,7 @@ static void mem_cgroup_css_released(struct
cgroup_subsys_state *css)
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- int __maybe_unused i;
-#ifdef CONFIG_CGROUP_WRITEBACK
- for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
- wb_wait_for_completion(&memcg->cgwb_frn[i].done);
-#endif
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
static_branch_dec(&memcg_sockets_enabled_key);
>
> Thanks.
>
Thanks,
--
Julian Sun <sunjunchao@bytedance.com>
next prev parent reply other threads:[~2025-08-22 8:22 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-20 11:19 [PATCH 0/3] memcg, writeback: Don't wait writeback completion Julian Sun
2025-08-20 11:19 ` [PATCH 1/3] writeback: Rename wb_writeback_work->auto_free to free_work Julian Sun
2025-08-20 11:19 ` [PATCH] writeback: Add wb_writeback_work->free_done Julian Sun
2025-08-20 11:19 ` [PATCH] memcg: Don't wait writeback completion when release memcg Julian Sun
2025-08-20 20:58 ` Tejun Heo
2025-08-21 2:30 ` [External] " Julian Sun
2025-08-21 16:59 ` Tejun Heo
2025-08-21 18:00 ` Julian Sun
2025-08-21 18:16 ` Julian Sun
2025-08-21 19:01 ` Tejun Heo
2025-08-22 8:22 ` Julian Sun [this message]
2025-08-22 17:56 ` Tejun Heo
2025-08-23 6:18 ` Julian Sun
2025-08-23 8:08 ` Giorgi Tchankvetadze
2025-08-23 8:22 ` Julian Sun
2025-08-23 14:08 ` Giorgi Tchankvetadze
2025-08-23 15:17 ` Julian Sun
2025-08-25 17:45 ` Julian Sun
2025-08-25 18:53 ` Tejun Heo
2025-08-25 19:06 ` Julian Sun
2025-08-25 10:13 ` Jan Kara
2025-08-25 12:08 ` Julian Sun
2025-08-25 18:57 ` [External] " Tejun Heo
2025-08-20 12:16 ` [PATCH 0/3] memcg, writeback: Don't wait writeback completion Giorgi Tchankvetadze
2025-08-21 2:37 ` [External] " Julian Sun
2025-08-22 9:29 ` Giorgi Tchankvetadze
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f1ff9656-6633-4a32-ab32-9ee60400b9b0@bytedance.com \
--to=sunjunchao@bytedance.com \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=tj@kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox