On Thu, Apr 03, 2025 at 06:10:49PM -0700, JP Kobryn wrote: > --- a/kernel/cgroup/cgroup.c > +++ b/kernel/cgroup/cgroup.c ... > @@ -5425,6 +5417,9 @@ static void css_free_rwork_fn(struct work_struct *work) > struct cgroup_subsys_state *parent = css->parent; > int id = css->id; > > + if (ss->css_rstat_flush) > + css_rstat_exit(css); > + It should be safe to call this unguarded (see also my comment below at css_rstat_flush()). > ss->css_free(css); > cgroup_idr_remove(&ss->css_idr, id); > cgroup_put(cgrp); > @@ -5477,11 +5472,8 @@ static void css_release_work_fn(struct work_struct *work) > if (ss) { > struct cgroup *parent_cgrp; > > - /* css release path */ > - if (!list_empty(&css->rstat_css_node)) { > + if (ss->css_rstat_flush) > css_rstat_flush(css); > - list_del_rcu(&css->rstat_css_node); > - } Ditto. > __bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css) > { > - struct cgroup *cgrp = css->cgroup; > int cpu; > > might_sleep(); > for_each_possible_cpu(cpu) { > - struct cgroup *pos; > + struct cgroup_subsys_state *pos; > > /* Reacquire for each CPU to avoid disabling IRQs too long */ > __css_rstat_lock(css, cpu); > - pos = cgroup_rstat_updated_list(cgrp, cpu); > + pos = css_rstat_updated_list(css, cpu); > for (; pos; pos = pos->rstat_flush_next) { > - struct cgroup_subsys_state *css; > - > - cgroup_base_stat_flush(pos, cpu); > - bpf_rstat_flush(pos, cgroup_parent(pos), cpu); > - > - rcu_read_lock(); > - list_for_each_entry_rcu(css, &pos->rstat_css_list, > - rstat_css_node) > + if (css_is_cgroup(pos)) { > + cgroup_base_stat_flush(pos->cgroup, cpu); > + bpf_rstat_flush(pos->cgroup, > + cgroup_parent(pos->cgroup), cpu); > + } else if (pos->ss->css_rstat_flush) > css->ss->css_rstat_flush(css, cpu); These conditions -- css_is_cgroup(pos) and pos->ss->css_rstat_flush should be invariant wrt pos in the split tree, right? It's a μoptimization but may be worth checking only once before processing the update tree? > - rcu_read_unlock(); > } > __css_rstat_unlock(css, cpu); > if (!cond_resched()) > @@ -362,29 +359,38 @@ int css_rstat_init(struct cgroup_subsys_state *css) > struct cgroup *cgrp = css->cgroup; > int cpu; > > - /* the root cgrp has rstat_cpu preallocated */ > - if (!cgrp->rstat_cpu) { > - cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); > - if (!cgrp->rstat_cpu) > - return -ENOMEM; > + /* the root cgrp has rstat_base_cpu preallocated */ > + if (css_is_cgroup(css)) { > + if (!cgrp->rstat_base_cpu) { > + cgrp->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); > + if (!cgrp->rstat_base_cpu) > + return -ENOMEM; > + } > } > > - if (!cgrp->rstat_base_cpu) { > - cgrp->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); > - if (!cgrp->rstat_cpu) { > - free_percpu(cgrp->rstat_cpu); Thanks, Michal