From: Michal Hocko <mhocko@suse.com>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: cgroups@vger.kernel.org, linux-mm@kvack.org,
"Andrew Morton" <akpm@linux-foundation.org>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Michal Koutný" <mkoutny@suse.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Vladimir Davydov" <vdavydov.dev@gmail.com>,
"Waiman Long" <longman@redhat.com>,
"Roman Gushchin" <guro@fb.com>
Subject: Re: [PATCH v3 1/5] mm/memcg: Revert ("mm/memcg: optimize user context object stock access")
Date: Mon, 21 Feb 2022 15:26:49 +0100 [thread overview]
Message-ID: <YhOhKedktkyA8nR+@dhcp22.suse.cz> (raw)
In-Reply-To: <20220217094802.3644569-2-bigeasy@linutronix.de>
On Thu 17-02-22 10:47:58, Sebastian Andrzej Siewior wrote:
> From: Michal Hocko <mhocko@suse.com>
>
> The optimisation is based on a micro benchmark where local_irq_save() is
> more expensive than a preempt_disable(). There is no evidence that it is
> visible in a real-world workload and there are CPUs where the opposite is
> true (local_irq_save() is cheaper than preempt_disable()).
>
> Based on micro benchmarks, the optimisation makes sense on PREEMPT_NONE
> where preempt_disable() is optimized away. There is no improvement with
> PREEMPT_DYNAMIC since the preemption counter is always available.
>
> The optimization makes also the PREEMPT_RT integration more complicated
> since most of the assumption are not true on PREEMPT_RT.
>
> Revert the optimisation since it complicates the PREEMPT_RT integration
> and the improvement is hardly visible.
>
> [ bigeasy: Patch body around Michal's diff ]
Thanks for preparing the changelog for this. I was planning to post mine
but I was waiting for a feedback from Waiman. Anyway this looks good to
me.
>
> Link: https://lore.kernel.org/all/YgOGkXXCrD%2F1k+p4@dhcp22.suse.cz
> Link: https://lkml.kernel.org/r/YdX+INO9gQje6d0S@linutronix.de
> Signed-off-by: Michal Hocko <mhocko@suse.com>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Acked-by: Roman Gushchin <guro@fb.com>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
> mm/memcontrol.c | 94 ++++++++++++++-----------------------------------
> 1 file changed, 27 insertions(+), 67 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 3c4816147273a..8ab2dc75e70ec 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2078,23 +2078,17 @@ void unlock_page_memcg(struct page *page)
> folio_memcg_unlock(page_folio(page));
> }
>
> -struct obj_stock {
> +struct memcg_stock_pcp {
> + struct mem_cgroup *cached; /* this never be root cgroup */
> + unsigned int nr_pages;
> +
> #ifdef CONFIG_MEMCG_KMEM
> struct obj_cgroup *cached_objcg;
> struct pglist_data *cached_pgdat;
> unsigned int nr_bytes;
> int nr_slab_reclaimable_b;
> int nr_slab_unreclaimable_b;
> -#else
> - int dummy[0];
> #endif
> -};
> -
> -struct memcg_stock_pcp {
> - struct mem_cgroup *cached; /* this never be root cgroup */
> - unsigned int nr_pages;
> - struct obj_stock task_obj;
> - struct obj_stock irq_obj;
>
> struct work_struct work;
> unsigned long flags;
> @@ -2104,13 +2098,13 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
> static DEFINE_MUTEX(percpu_charge_mutex);
>
> #ifdef CONFIG_MEMCG_KMEM
> -static void drain_obj_stock(struct obj_stock *stock);
> +static void drain_obj_stock(struct memcg_stock_pcp *stock);
> static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> struct mem_cgroup *root_memcg);
> static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages);
>
> #else
> -static inline void drain_obj_stock(struct obj_stock *stock)
> +static inline void drain_obj_stock(struct memcg_stock_pcp *stock)
> {
> }
> static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> @@ -2190,9 +2184,7 @@ static void drain_local_stock(struct work_struct *dummy)
> local_irq_save(flags);
>
> stock = this_cpu_ptr(&memcg_stock);
> - drain_obj_stock(&stock->irq_obj);
> - if (in_task())
> - drain_obj_stock(&stock->task_obj);
> + drain_obj_stock(stock);
> drain_stock(stock);
> clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
>
> @@ -2767,41 +2759,6 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
> */
> #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
>
> -/*
> - * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
> - * sequence used in this case to access content from object stock is slow.
> - * To optimize for user context access, there are now two object stocks for
> - * task context and interrupt context access respectively.
> - *
> - * The task context object stock can be accessed by disabling preemption only
> - * which is cheap in non-preempt kernel. The interrupt context object stock
> - * can only be accessed after disabling interrupt. User context code can
> - * access interrupt object stock, but not vice versa.
> - */
> -static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
> -{
> - struct memcg_stock_pcp *stock;
> -
> - if (likely(in_task())) {
> - *pflags = 0UL;
> - preempt_disable();
> - stock = this_cpu_ptr(&memcg_stock);
> - return &stock->task_obj;
> - }
> -
> - local_irq_save(*pflags);
> - stock = this_cpu_ptr(&memcg_stock);
> - return &stock->irq_obj;
> -}
> -
> -static inline void put_obj_stock(unsigned long flags)
> -{
> - if (likely(in_task()))
> - preempt_enable();
> - else
> - local_irq_restore(flags);
> -}
> -
> /*
> * mod_objcg_mlstate() may be called with irq enabled, so
> * mod_memcg_lruvec_state() should be used.
> @@ -3082,10 +3039,13 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
> void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> enum node_stat_item idx, int nr)
> {
> + struct memcg_stock_pcp *stock;
> unsigned long flags;
> - struct obj_stock *stock = get_obj_stock(&flags);
> int *bytes;
>
> + local_irq_save(flags);
> + stock = this_cpu_ptr(&memcg_stock);
> +
> /*
> * Save vmstat data in stock and skip vmstat array update unless
> * accumulating over a page of vmstat data or when pgdat or idx
> @@ -3136,26 +3096,29 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> if (nr)
> mod_objcg_mlstate(objcg, pgdat, idx, nr);
>
> - put_obj_stock(flags);
> + local_irq_restore(flags);
> }
>
> static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> {
> + struct memcg_stock_pcp *stock;
> unsigned long flags;
> - struct obj_stock *stock = get_obj_stock(&flags);
> bool ret = false;
>
> + local_irq_save(flags);
> +
> + stock = this_cpu_ptr(&memcg_stock);
> if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
> stock->nr_bytes -= nr_bytes;
> ret = true;
> }
>
> - put_obj_stock(flags);
> + local_irq_restore(flags);
>
> return ret;
> }
>
> -static void drain_obj_stock(struct obj_stock *stock)
> +static void drain_obj_stock(struct memcg_stock_pcp *stock)
> {
> struct obj_cgroup *old = stock->cached_objcg;
>
> @@ -3211,13 +3174,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> {
> struct mem_cgroup *memcg;
>
> - if (in_task() && stock->task_obj.cached_objcg) {
> - memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
> - if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
> - return true;
> - }
> - if (stock->irq_obj.cached_objcg) {
> - memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
> + if (stock->cached_objcg) {
> + memcg = obj_cgroup_memcg(stock->cached_objcg);
> if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
> return true;
> }
> @@ -3228,10 +3186,13 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
> bool allow_uncharge)
> {
> + struct memcg_stock_pcp *stock;
> unsigned long flags;
> - struct obj_stock *stock = get_obj_stock(&flags);
> unsigned int nr_pages = 0;
>
> + local_irq_save(flags);
> +
> + stock = this_cpu_ptr(&memcg_stock);
> if (stock->cached_objcg != objcg) { /* reset if necessary */
> drain_obj_stock(stock);
> obj_cgroup_get(objcg);
> @@ -3247,7 +3208,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
> stock->nr_bytes &= (PAGE_SIZE - 1);
> }
>
> - put_obj_stock(flags);
> + local_irq_restore(flags);
>
> if (nr_pages)
> obj_cgroup_uncharge_pages(objcg, nr_pages);
> @@ -6812,7 +6773,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
> long nr_pages;
> struct mem_cgroup *memcg;
> struct obj_cgroup *objcg;
> - bool use_objcg = folio_memcg_kmem(folio);
>
> VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
>
> @@ -6821,7 +6781,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
> * folio memcg or objcg at this point, we have fully
> * exclusive access to the folio.
> */
> - if (use_objcg) {
> + if (folio_memcg_kmem(folio)) {
> objcg = __folio_objcg(folio);
> /*
> * This get matches the put at the end of the function and
> @@ -6849,7 +6809,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
>
> nr_pages = folio_nr_pages(folio);
>
> - if (use_objcg) {
> + if (folio_memcg_kmem(folio)) {
> ug->nr_memory += nr_pages;
> ug->nr_kmem += nr_pages;
>
> --
> 2.34.1
--
Michal Hocko
SUSE Labs
next prev parent reply other threads:[~2022-02-21 14:26 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-17 9:47 [PATCH v3 0/5] mm/memcg: Address PREEMPT_RT problems instead of disabling it Sebastian Andrzej Siewior
2022-02-17 9:47 ` [PATCH v3 1/5] mm/memcg: Revert ("mm/memcg: optimize user context object stock access") Sebastian Andrzej Siewior
2022-02-18 16:09 ` Shakeel Butt
2022-02-21 14:26 ` Michal Hocko [this message]
2022-02-17 9:47 ` [PATCH v3 2/5] mm/memcg: Disable threshold event handlers on PREEMPT_RT Sebastian Andrzej Siewior
2022-02-18 16:39 ` Shakeel Butt
2022-02-21 14:27 ` Michal Hocko
2022-02-17 9:48 ` [PATCH v3 3/5] mm/memcg: Protect per-CPU counter by disabling preemption on PREEMPT_RT where needed Sebastian Andrzej Siewior
2022-02-18 17:25 ` Shakeel Butt
2022-02-21 11:31 ` Sebastian Andrzej Siewior
2022-02-21 12:12 ` Sebastian Andrzej Siewior
2022-02-21 13:18 ` Michal Koutný
2022-02-21 13:58 ` Sebastian Andrzej Siewior
2022-02-17 9:48 ` [PATCH v3 4/5] mm/memcg: Opencode the inner part of obj_cgroup_uncharge_pages() in drain_obj_stock() Sebastian Andrzej Siewior
2022-02-18 18:40 ` Shakeel Butt
2022-02-18 19:07 ` Roman Gushchin
2022-02-21 14:30 ` Michal Hocko
2022-02-17 9:48 ` [PATCH v3 5/5] mm/memcg: Protect memcg_stock with a local_lock_t Sebastian Andrzej Siewior
2022-02-21 14:46 ` Michal Hocko
2022-02-21 15:19 ` Sebastian Andrzej Siewior
2022-02-21 16:24 ` Michal Hocko
2022-02-21 16:44 ` Sebastian Andrzej Siewior
2022-02-21 17:17 ` Michal Hocko
2022-02-21 17:25 ` Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YhOhKedktkyA8nR+@dhcp22.suse.cz \
--to=mhocko@suse.com \
--cc=akpm@linux-foundation.org \
--cc=bigeasy@linutronix.de \
--cc=cgroups@vger.kernel.org \
--cc=guro@fb.com \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mkoutny@suse.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=vdavydov.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox