From: Dennis Zhou <dennis@kernel.org>
To: Tejun Heo <tj@kernel.org>, Christoph Lameter <cl@linux.com>,
Roman Gushchin <guro@fb.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Dennis Zhou <dennis@kernel.org>
Subject: [PATCH 4/4] percpu: use reclaim threshold instead of running for every page
Date: Mon, 19 Apr 2021 22:50:47 +0000 [thread overview]
Message-ID: <20210419225047.3415425-5-dennis@kernel.org> (raw)
In-Reply-To: <20210419225047.3415425-1-dennis@kernel.org>
The last patch implements reclaim by adding 2 additional lists where a
chunk's lifecycle is:
active_slot -> to_depopulate_slot -> sidelined_slot
This worked great because we're able to nicely converge paths into
isolation. However, it's a bit aggressive to run for every free page.
Let's accumulate a few free pages before we do this. To do this, the new
lifecycle is:
active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot
The transition from sidelined_slot -> to_depopulate_slot occurs on a
threshold instead of before where it directly went to the
to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to
aid with this.
Suggested-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
mm/percpu-internal.h | 1 +
mm/percpu-stats.c | 8 ++++++--
mm/percpu.c | 44 +++++++++++++++++++++++++++++++++++++-------
3 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 10604dce806f..b3e43b016276 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -92,6 +92,7 @@ extern int pcpu_nr_slots;
extern int pcpu_sidelined_slot;
extern int pcpu_to_depopulate_slot;
extern int pcpu_nr_empty_pop_pages[];
+extern int pcpu_nr_isolated_empty_pop_pages[];
extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 2125981acfb9..facc804eb86c 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,7 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;
- int nr_empty_pop_pages;
+ int nr_empty_pop_pages, nr_isolated_empty_pop_pages;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -167,8 +167,11 @@ static int percpu_stats_show(struct seq_file *m, void *v)
}
nr_empty_pop_pages = 0;
- for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ nr_isolated_empty_pop_pages = 0;
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+ nr_isolated_empty_pop_pages += pcpu_nr_isolated_empty_pop_pages[type];
+ }
#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
@@ -202,6 +205,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
PU(min_alloc_size);
PU(max_alloc_size);
P("empty_pop_pages", nr_empty_pop_pages);
+ P("iso_empty_pop_pages", nr_isolated_empty_pop_pages);
seq_putc(m, '\n');
#undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 79eebc80860d..ba13e683d022 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,6 +110,9 @@
#define PCPU_EMPTY_POP_PAGES_LOW 2
#define PCPU_EMPTY_POP_PAGES_HIGH 4
+/* only schedule reclaim if there are at least N empty pop pages sidelined */
+#define PCPU_EMPTY_POP_RECLAIM_THRESHOLD 4
+
#ifdef CONFIG_SMP
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr
@@ -183,6 +186,7 @@ static LIST_HEAD(pcpu_map_extend_chunks);
* The reserved chunk doesn't contribute to the count.
*/
int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
+int pcpu_nr_isolated_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
/*
* The number of populated pages in use by the allocator, protected by
@@ -582,8 +586,10 @@ static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
if (!chunk->isolated) {
chunk->isolated = true;
pcpu_nr_empty_pop_pages[type] -= chunk->nr_empty_pop_pages;
+ pcpu_nr_isolated_empty_pop_pages[type] +=
+ chunk->nr_empty_pop_pages;
+ list_move(&chunk->list, &pcpu_slot[pcpu_sidelined_slot]);
}
- list_move(&chunk->list, &pcpu_slot[pcpu_to_depopulate_slot]);
}
static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
@@ -595,6 +601,8 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
if (chunk->isolated) {
chunk->isolated = false;
pcpu_nr_empty_pop_pages[type] += chunk->nr_empty_pop_pages;
+ pcpu_nr_isolated_empty_pop_pages[type] -=
+ chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(chunk, -1);
}
}
@@ -610,9 +618,15 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
*/
static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
{
+ enum pcpu_chunk_type type = pcpu_chunk_type(chunk);
+
chunk->nr_empty_pop_pages += nr;
- if (chunk != pcpu_reserved_chunk && !chunk->isolated)
- pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
+ if (chunk != pcpu_reserved_chunk) {
+ if (chunk->isolated)
+ pcpu_nr_isolated_empty_pop_pages[type] += nr;
+ else
+ pcpu_nr_empty_pop_pages[type] += nr;
+ }
}
/*
@@ -2138,10 +2152,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
struct list_head *pcpu_slot = pcpu_chunk_list(type);
struct pcpu_chunk *chunk;
struct pcpu_block_md *block;
+ LIST_HEAD(to_depopulate);
int i, end;
spin_lock_irq(&pcpu_lock);
+ list_splice_init(&pcpu_slot[pcpu_to_depopulate_slot], &to_depopulate);
+
restart:
/*
* Once a chunk is isolated to the to_depopulate list, the chunk is no
@@ -2149,9 +2166,9 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
* other accessor is the free path which only returns area back to the
* allocator not touching the populated bitmap.
*/
- while (!list_empty(&pcpu_slot[pcpu_to_depopulate_slot])) {
- chunk = list_first_entry(&pcpu_slot[pcpu_to_depopulate_slot],
- struct pcpu_chunk, list);
+ while (!list_empty(&to_depopulate)) {
+ chunk = list_first_entry(&to_depopulate, struct pcpu_chunk,
+ list);
WARN_ON(chunk->immutable);
/*
@@ -2208,6 +2225,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
&pcpu_slot[pcpu_sidelined_slot]);
}
+ if (pcpu_nr_isolated_empty_pop_pages[type] >=
+ PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+ list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+ &pcpu_slot[pcpu_to_depopulate_slot]);
+ pcpu_schedule_balance_work();
+ }
+
spin_unlock_irq(&pcpu_lock);
}
@@ -2291,7 +2315,13 @@ void free_percpu(void __percpu *ptr)
}
} else if (pcpu_should_reclaim_chunk(chunk)) {
pcpu_isolate_chunk(chunk);
- need_balance = true;
+ if (chunk->free_bytes == pcpu_unit_size ||
+ pcpu_nr_isolated_empty_pop_pages[pcpu_chunk_type(chunk)] >=
+ PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+ list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+ &pcpu_slot[pcpu_to_depopulate_slot]);
+ need_balance = true;
+ }
}
trace_percpu_free_percpu(chunk->base_addr, off, ptr);
--
2.31.1.368.gbe11c130af-goog
next prev parent reply other threads:[~2021-04-19 22:51 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-19 22:50 [PATCH v4 0/4] percpu: partial chunk depopulation Dennis Zhou
2021-04-19 22:50 ` [PATCH 1/4] percpu: factor out pcpu_check_block_hint() Dennis Zhou
2021-04-19 22:50 ` [PATCH 2/4] percpu: use pcpu_free_slot instead of pcpu_nr_slots - 1 Dennis Zhou
2021-04-20 21:22 ` Roman Gushchin
2021-04-19 22:50 ` [PATCH 3/4] percpu: implement partial chunk depopulation Dennis Zhou
2021-07-02 19:11 ` Guenter Roeck
2021-07-02 19:45 ` Dennis Zhou
2021-07-02 20:28 ` Guenter Roeck
2021-07-02 21:00 ` Dennis Zhou
2021-04-19 22:50 ` Dennis Zhou [this message]
2021-04-20 21:23 ` [PATCH 4/4] percpu: use reclaim threshold instead of running for every page Roman Gushchin
2021-04-19 22:54 ` [PATCH v4 0/4] percpu: partial chunk depopulation Dennis Zhou
2021-04-19 22:57 ` Dennis Zhou
2021-04-20 11:07 ` Pratik Sampat
2021-04-20 14:39 ` Dennis Zhou
2021-04-20 15:25 ` Pratik Sampat
2021-04-21 18:24 ` Dennis Zhou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210419225047.3415425-5-dennis@kernel.org \
--to=dennis@kernel.org \
--cc=cl@linux.com \
--cc=guro@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox