@@ -2651,20 +2652,35 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep) struct mem_cgroup *memcg; struct kmem_cache *memcg_cachep; int kmemcg_id; + struct memcg_cache_array *arr; VM_BUG_ON(!is_root_cache(cachep)); if (memcg_kmem_bypass()) return cachep; - memcg = get_mem_cgroup_from_current(); + rcu_read_lock(); + + if (unlikely(current->active_memcg)) + memcg = current->active_memcg; + else + memcg = mem_cgroup_from_task(current); + + if (!memcg || memcg == root_mem_cgroup) + goto out_unlock; + kmemcg_id = READ_ONCE(memcg->kmemcg_id); if (kmemcg_id < 0) - goto out; + goto out_unlock; - memcg_cachep = cache_from_memcg_idx(cachep, kmemcg_id); - if (likely(memcg_cachep)) - return memcg_cachep; + arr = rcu_dereference(cachep->memcg_params.memcg_caches); + + /* + * Make sure we will access the up-to-date value. The code updating + * memcg_caches issues a write barrier to match this (see + * memcg_create_kmem_cache()). + */ + memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]); /* * If we are in a safe context (can wait, and not in interrupt @@ -2677,10 +2693,20 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep) * memcg_create_kmem_cache, this means no further allocation * could happen with the slab_mutex held. So it's better to * defer everything. + * + * If the memcg is dying or memcg_cache is about to be released, + * don't bother creating new kmem_caches. Because memcg_cachep + * is ZEROed as the fist step of kmem offlining, we don't need + * percpu_ref_tryget() here. css_tryget_online() check in*percpu_ref_tryget_live()+ * memcg_schedule_kmem_cache_create() will prevent us from + * creation of a new kmem_cache. */ - memcg_schedule_kmem_cache_create(memcg, cachep); -out: - css_put(&memcg->css); + if (unlikely(!memcg_cachep)) + memcg_schedule_kmem_cache_create(memcg, cachep); + else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) + cachep = memcg_cachep; +out_unlock: + rcu_read_lock();
There is one more bug that causes the kernel to panic on bootup when I turned on debugging options.
[ 49.871437] =============================
[ 49.875452] WARNING: suspicious RCU usage
[ 49.879476] 5.2.0-rc1.bz1699202_memcg_test+ #2 Not tainted
[ 49.884967] -----------------------------
[ 49.888991] include/linux/rcupdate.h:268 Illegal context switch
in RCU read-side critical section!
[ 49.897950]
[ 49.897950] other info that might help us debug this:
[ 49.897950]
[ 49.905958]
[ 49.905958] rcu_scheduler_active = 2, debug_locks = 1
[ 49.912492] 3 locks held by systemd/1:
[ 49.916252] #0: 00000000633673c5
(&type->i_mutex_dir_key#5){.+.+}, at: lookup_slow+0x42/0x70
[ 49.924788] #1: 0000000029fa8c75 (rcu_read_lock){....}, at:
memcg_kmem_get_cache+0x12b/0x910
[ 49.933316] #2: 0000000029fa8c75 (rcu_read_lock){....}, at:
memcg_kmem_get_cache+0x3da/0x910
It should be "rcu_read_unlock();" at the end.
-Longman