From: Johannes Weiner <hannes@cmpxchg.org>
To: Eric Dumazet <edumazet@google.com>
Cc: Ivan Babrou <ivan@cloudflare.com>, Linux MM <linux-mm@kvack.org>,
Linux Kernel Network Developers <netdev@vger.kernel.org>,
linux-kernel <linux-kernel@vger.kernel.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeelb@google.com>,
Muchun Song <songmuchun@bytedance.com>,
Andrew Morton <akpm@linux-foundation.org>,
"David S. Miller" <davem@davemloft.net>,
Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>,
David Ahern <dsahern@kernel.org>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
cgroups@vger.kernel.org, kernel-team <kernel-team@cloudflare.com>
Subject: Re: Low TCP throughput due to vmpressure with swap enabled
Date: Tue, 6 Dec 2022 21:51:01 +0100 [thread overview]
Message-ID: <Y4+rNYF9WZyJyBQp@cmpxchg.org> (raw)
In-Reply-To: <CANn89iJfx4QdVBqJ23oFJoz5DJKou=ZwVBNNXFNDJRNAqNvzwQ@mail.gmail.com>
On Tue, Dec 06, 2022 at 08:13:50PM +0100, Eric Dumazet wrote:
> On Tue, Dec 6, 2022 at 8:00 PM Johannes Weiner <hannes@cmpxchg.org> wrote:
> > @@ -1701,10 +1701,10 @@ void mem_cgroup_sk_alloc(struct sock *sk);
> > void mem_cgroup_sk_free(struct sock *sk);
> > static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
> > {
> > - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
> > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->socket_pressure)
>
> && READ_ONCE(memcg->socket_pressure))
>
> > return true;
> > do {
> > - if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
> > + if (memcg->socket_pressure)
>
> if (READ_ONCE(...))
Good point, I'll add those.
> > @@ -7195,10 +7194,10 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
> > struct page_counter *fail;
> >
> > if (page_counter_try_charge(&memcg->tcpmem, nr_pages, &fail)) {
> > - memcg->tcpmem_pressure = 0;
>
> Orthogonal to your patch, but:
>
> Maybe avoid touching this cache line too often and use READ/WRITE_ONCE() ?
>
> if (READ_ONCE(memcg->socket_pressure))
> WRITE_ONCE(memcg->socket_pressure, false);
Ah, that's a good idea.
I think it'll be fine in the failure case, since that's associated
with OOM and total performance breakdown anyway.
But certainly, in the common case of the charge succeeding, we should
not keep hammering false into that variable over and over.
How about the delta below? I also flipped the branches around to keep
the common path at the first indentation level, hopefully making that
a bit clearer too.
Thanks for taking a look, Eric!
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ef1c388be5b3..13ae10116895 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1701,10 +1701,11 @@ void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->socket_pressure)
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
+ READ_ONCE(memcg->socket_pressure))
return true;
do {
- if (memcg->socket_pressure)
+ if (READ_ONCE(memcg->socket_pressure))
return true;
} while ((memcg = parent_mem_cgroup(memcg)));
return false;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0d4b9dbe775a..96c4ec0f11ca 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7193,31 +7193,29 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
struct page_counter *fail;
- if (page_counter_try_charge(&memcg->tcpmem, nr_pages, &fail)) {
- memcg->socket_pressure = false;
- return true;
+ if (!page_counter_try_charge(&memcg->tcpmem, nr_pages, &fail)) {
+ WRITE_ONCE(memcg->socket_pressure, true);
+ if (gfp_mask & __GFP_FAIL) {
+ page_counter_charge(&memcg->tcpmem, nr_pages);
+ return true;
+ }
+ return false;
}
- memcg->socket_pressure = true;
+ if (unlikely(READ_ONCE(memcg->socket_pressure)))
+ WRITE_ONCE(memcg->socket_pressure, false);
+ }
+
+ if (try_charge(memcg, gfp_mask & ~__GFP_NOFAIL, nr_pages) < 0) {
+ WRITE_ONCE(memcg->socket_pressure, true);
if (gfp_mask & __GFP_NOFAIL) {
- page_counter_charge(&memcg->tcpmem, nr_pages);
+ try_charge(memcg, gfp_mask, nr_pages);
+ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
return true;
}
return false;
}
-
- if (try_charge(memcg, gfp_mask & ~__GFP_NOFAIL, nr_pages) == 0) {
- memcg->socket_pressure = false;
- goto success;
- }
- memcg->socket_pressure = true;
- if (gfp_mask & __GFP_NOFAIL) {
- try_charge(memcg, gfp_mask, nr_pages);
- goto success;
- }
-
- return false;
-
-success:
+ if (unlikely(READ_ONCE(memcg->socket_pressure)))
+ WRITE_ONCE(memcg->socket_pressure, false);
mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
return true;
}
next prev parent reply other threads:[~2022-12-06 20:51 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-22 0:53 Ivan Babrou
2022-11-22 18:01 ` Eric Dumazet
2022-11-22 18:11 ` Ivan Babrou
2022-11-22 18:23 ` Eric Dumazet
2022-11-22 18:59 ` Yu Zhao
2022-11-22 19:05 ` Ivan Babrou
2022-11-22 19:08 ` Yu Zhao
2022-11-22 19:46 ` Yu Zhao
2022-11-22 20:05 ` Yu Zhao
2022-11-23 0:44 ` Yu Zhao
2022-11-23 21:22 ` Johannes Weiner
2022-11-24 1:18 ` Yu Zhao
2022-11-24 1:29 ` Yu Zhao
2022-11-22 20:05 ` Johannes Weiner
2022-11-22 22:11 ` Ivan Babrou
2022-11-23 1:28 ` Ivan Babrou
2022-11-28 18:07 ` Johannes Weiner
2022-12-05 19:28 ` Shakeel Butt
2022-12-05 23:57 ` Ivan Babrou
2022-12-06 0:50 ` Ivan Babrou
2022-12-06 19:00 ` Johannes Weiner
2022-12-06 19:13 ` Eric Dumazet
2022-12-06 20:51 ` Johannes Weiner [this message]
2022-12-06 23:10 ` Shakeel Butt
2022-12-07 12:53 ` Johannes Weiner
2022-12-08 0:31 ` Shakeel Butt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y4+rNYF9WZyJyBQp@cmpxchg.org \
--to=hannes@cmpxchg.org \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=ivan@cloudflare.com \
--cc=kernel-team@cloudflare.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeelb@google.com \
--cc=songmuchun@bytedance.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox