From: Michal Hocko <mhocko@suse.com>
To: Shakeel Butt <shakeelb@google.com>
Cc: "Waiman Long" <longman@redhat.com>,
"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>,
Cgroups <cgroups@vger.kernel.org>,
"Linux MM" <linux-mm@kvack.org>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Michal Koutný" <mkoutny@suse.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Vladimir Davydov" <vdavydov.dev@gmail.com>
Subject: Re: [PATCH 3/4] mm/memcg: Add a local_lock_t for IRQ and TASK object.
Date: Wed, 9 Feb 2022 10:17:05 +0100 [thread overview]
Message-ID: <YgOGkXXCrD/1k+p4@dhcp22.suse.cz> (raw)
In-Reply-To: <CALvZod7yovQ5OTWr=k_eiEBVb1LTRvPkbsY8joAtyigQnvBUww@mail.gmail.com>
On Tue 08-02-22 09:58:27, Shakeel Butt wrote:
[...]
> commit 559271146efc0 is a part of patch series "mm/memcg: Reduce
> kmemcache memory accounting overhead". For perf numbers you can see
> the cover letter in the commit fdbcb2a6d677 ("mm/memcg: move
> mod_objcg_state() to memcontrol.c").
Thanks for the pointer! This helped because I couldn't follow the
original series[1].
I definitely do not want to dispute the whole series. It is this
particular patch which makes further changes much more complex AFAICS
and the cost/benefit should be re-evaluated IMHO. Microbenchmarks are a
nice indecation but in this case we should be more re-evaluate. The
complexity has increased even more for RT requirements.
[1] It would be great if those patch specific benchmarks were in the
specific patch next time. This would make it much more easier to track.
Just as an exercise I have tried to revert 559271146efc0. It needs some
tweaking (on top of Linus tree) and I hope I got everything right.
Sebastian does this help for the RT case?
---
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 09d342c7cbd0..4b1572ae990d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2085,23 +2085,17 @@ void unlock_page_memcg(struct page *page)
folio_memcg_unlock(page_folio(page));
}
-struct obj_stock {
+struct memcg_stock_pcp {
+ struct mem_cgroup *cached; /* this never be root cgroup */
+ unsigned int nr_pages;
+
#ifdef CONFIG_MEMCG_KMEM
struct obj_cgroup *cached_objcg;
struct pglist_data *cached_pgdat;
unsigned int nr_bytes;
int nr_slab_reclaimable_b;
int nr_slab_unreclaimable_b;
-#else
- int dummy[0];
#endif
-};
-
-struct memcg_stock_pcp {
- struct mem_cgroup *cached; /* this never be root cgroup */
- unsigned int nr_pages;
- struct obj_stock task_obj;
- struct obj_stock irq_obj;
struct work_struct work;
unsigned long flags;
@@ -2111,12 +2105,12 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
static DEFINE_MUTEX(percpu_charge_mutex);
#ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct obj_stock *stock);
+static void drain_obj_stock(struct memcg_stock_pcp *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg);
#else
-static inline void drain_obj_stock(struct obj_stock *stock)
+static inline void drain_obj_stock(struct memcg_stock_pcp *stock)
{
}
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@@ -2193,9 +2187,7 @@ static void drain_local_stock(struct work_struct *dummy)
local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
- drain_obj_stock(&stock->irq_obj);
- if (in_task())
- drain_obj_stock(&stock->task_obj);
+ drain_obj_stock(stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
@@ -2770,41 +2762,6 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
*/
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
-/*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
- struct memcg_stock_pcp *stock;
-
- if (likely(in_task())) {
- *pflags = 0UL;
- preempt_disable();
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->task_obj;
- }
-
- local_irq_save(*pflags);
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
- if (likely(in_task()))
- preempt_enable();
- else
- local_irq_restore(flags);
-}
-
/*
* mod_objcg_mlstate() may be called with irq enabled, so
* mod_memcg_lruvec_state() should be used.
@@ -3075,10 +3032,13 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
+ struct memcg_stock_pcp *stock;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
int *bytes;
+ local_irq_save(flags);
+ stock = this_cpu_ptr(&memcg_stock);
+
/*
* Save vmstat data in stock and skip vmstat array update unless
* accumulating over a page of vmstat data or when pgdat or idx
@@ -3129,26 +3089,29 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
if (nr)
mod_objcg_mlstate(objcg, pgdat, idx, nr);
- put_obj_stock(flags);
+ local_irq_restore(flags);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
{
+ struct memcg_stock_pcp *stock;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
bool ret = false;
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
stock->nr_bytes -= nr_bytes;
ret = true;
}
- put_obj_stock(flags);
+ local_irq_restore(flags);
return ret;
}
-static void drain_obj_stock(struct obj_stock *stock)
+static void drain_obj_stock(struct memcg_stock_pcp *stock)
{
struct obj_cgroup *old = stock->cached_objcg;
@@ -3204,13 +3167,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
{
struct mem_cgroup *memcg;
- if (in_task() && stock->task_obj.cached_objcg) {
- memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
- if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
- return true;
- }
- if (stock->irq_obj.cached_objcg) {
- memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
+ if (stock->cached_objcg) {
+ memcg = obj_cgroup_memcg(stock->cached_objcg);
if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
return true;
}
@@ -3221,10 +3179,13 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
bool allow_uncharge)
{
+ struct memcg_stock_pcp *stock;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
unsigned int nr_pages = 0;
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (stock->cached_objcg != objcg) { /* reset if necessary */
drain_obj_stock(stock);
obj_cgroup_get(objcg);
@@ -3240,7 +3201,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
stock->nr_bytes &= (PAGE_SIZE - 1);
}
- put_obj_stock(flags);
+ local_irq_restore(flags);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -6821,7 +6782,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
long nr_pages;
struct mem_cgroup *memcg;
struct obj_cgroup *objcg;
- bool use_objcg = folio_memcg_kmem(folio);
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
@@ -6830,7 +6790,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
* folio memcg or objcg at this point, we have fully
* exclusive access to the folio.
*/
- if (use_objcg) {
+ if (folio_memcg_kmem(folio)) {
objcg = __folio_objcg(folio);
/*
* This get matches the put at the end of the function and
@@ -6858,7 +6818,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
nr_pages = folio_nr_pages(folio);
- if (use_objcg) {
+ if (folio_memcg_kmem(folio)) {
ug->nr_memory += nr_pages;
ug->nr_kmem += nr_pages;
--
Michal Hocko
SUSE Labs
next prev parent reply other threads:[~2022-02-09 9:17 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-25 16:43 [PATCH 0/4] mm/memcg: Address PREEMPT_RT problems instead of disabling it Sebastian Andrzej Siewior
2022-01-25 16:43 ` [PATCH 1/4] mm/memcg: Disable threshold event handlers on PREEMPT_RT Sebastian Andrzej Siewior
2022-01-26 14:40 ` Michal Hocko
2022-01-26 14:45 ` Sebastian Andrzej Siewior
2022-01-26 15:04 ` Michal Koutný
2022-01-27 13:36 ` Sebastian Andrzej Siewior
2022-01-26 15:21 ` Michal Hocko
2022-01-25 16:43 ` [PATCH 2/4] mm/memcg: Protect per-CPU counter by disabling preemption on PREEMPT_RT where needed Sebastian Andrzej Siewior
2022-01-26 10:06 ` Vlastimil Babka
2022-01-26 11:24 ` Sebastian Andrzej Siewior
2022-01-26 14:56 ` Michal Hocko
2022-01-25 16:43 ` [PATCH 3/4] mm/memcg: Add a local_lock_t for IRQ and TASK object Sebastian Andrzej Siewior
2022-01-26 15:20 ` Michal Hocko
2022-01-27 11:53 ` Sebastian Andrzej Siewior
2022-02-01 12:04 ` Michal Hocko
2022-02-01 12:11 ` Sebastian Andrzej Siewior
2022-02-01 15:29 ` Michal Hocko
2022-02-03 9:54 ` Sebastian Andrzej Siewior
2022-02-03 10:09 ` Michal Hocko
2022-02-03 11:09 ` Sebastian Andrzej Siewior
2022-02-08 17:58 ` Shakeel Butt
2022-02-09 9:17 ` Michal Hocko [this message]
2022-01-26 16:57 ` Vlastimil Babka
2022-01-31 15:06 ` Sebastian Andrzej Siewior
2022-02-03 16:01 ` Vlastimil Babka
2022-02-08 17:17 ` Sebastian Andrzej Siewior
2022-02-08 17:28 ` Michal Hocko
2022-02-09 1:48 ` [mm/memcg] 86895e1e85: WARNING:possible_circular_locking_dependency_detected kernel test robot
2022-01-25 16:43 ` [PATCH 4/4] mm/memcg: Allow the task_obj optimization only on non-PREEMPTIBLE kernels Sebastian Andrzej Siewior
2022-01-25 23:21 ` [PATCH 0/4] mm/memcg: Address PREEMPT_RT problems instead of disabling it Andrew Morton
2022-01-26 7:30 ` Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YgOGkXXCrD/1k+p4@dhcp22.suse.cz \
--to=mhocko@suse.com \
--cc=akpm@linux-foundation.org \
--cc=bigeasy@linutronix.de \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mkoutny@suse.com \
--cc=peterz@infradead.org \
--cc=shakeelb@google.com \
--cc=tglx@linutronix.de \
--cc=vdavydov.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox