From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: Yu Zhao <yuzhao@google.com>,
"T . J . Alumbaugh" <talumbau@google.com>,
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH 3/3] mm/lru_gen: Don't build multi-gen LRU page table walk code on architecture not supported
Date: Tue, 13 Jun 2023 17:30:47 +0530 [thread overview]
Message-ID: <20230613120047.149573-3-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20230613120047.149573-1-aneesh.kumar@linux.ibm.com>
Not all architecture supports hardware atomic updates of access bits. On
such an arch, we don't use page table walk to classify pages into
generations. Add a kernel config option and remove adding all the page
table walk code on such architecture.
No preformance change observed with mongodb ycsb test:
Patch details Throughput(Ops/sec)
without patch 93278
With patch 93400
Without patch:
$ size mm/vmscan.o
text data bss dec hex filename
112102 42721 40 154863 25cef mm/vmscan.o
With patch
$ size mm/vmscan.o
text data bss dec hex filename
105430 41333 24 146787 23d63 mm/vmscan.o
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/Kconfig | 3 +
arch/arm64/Kconfig | 1 +
arch/x86/Kconfig | 1 +
include/linux/memcontrol.h | 2 +-
include/linux/mm_types.h | 8 +--
include/linux/mmzone.h | 8 +++
include/linux/swap.h | 2 +-
kernel/fork.c | 2 +-
mm/memcontrol.c | 2 +-
mm/vmscan.c | 128 +++++++++++++++++++++++++++++++++----
10 files changed, 137 insertions(+), 20 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..5cdd98731298 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1458,6 +1458,9 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
bool
+config LRU_TASK_PAGE_AGING
+ bool
+
config ARCH_HAS_NONLEAF_PMD_YOUNG
bool
help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1201d25a8a4..e0994fb3504b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -225,6 +225,7 @@ config ARM64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
+ select LRU_TASK_PAGE_AGING if LRU_GEN
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..bde9e6f33b22 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -276,6 +276,7 @@ config X86
select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
+ select LRU_TASK_PAGE_AGING if LRU_GEN
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 43d4ec8445d4..ea5d1d7bfb8b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -321,7 +321,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* per-memcg mm_struct list */
struct lru_gen_mm_list mm_list;
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..f90a4860a792 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -786,7 +786,7 @@ struct mm_struct {
*/
unsigned long ksm_rmap_items;
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
struct {
/* this mm_struct is on lru_gen_mm_list */
struct list_head list;
@@ -801,7 +801,7 @@ struct mm_struct {
struct mem_cgroup *memcg;
#endif
} lru_gen;
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_TASK_PAGE_AGING */
} __randomize_layout;
/*
@@ -830,7 +830,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
struct lru_gen_mm_list {
/* mm_struct list for page table walkers */
@@ -864,7 +864,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
WRITE_ONCE(mm->lru_gen.bitmap, -1);
}
-#else /* !CONFIG_LRU_GEN */
+#else /* !CONFIG_LRU_TASK_PAGE_AGING */
static inline void lru_gen_add_mm(struct mm_struct *mm)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..b35698148d3c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -427,6 +427,7 @@ struct lru_gen_folio {
#endif
};
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
enum {
MM_LEAF_TOTAL, /* total leaf entries */
MM_LEAF_OLD, /* old leaf entries */
@@ -469,6 +470,7 @@ struct lru_gen_mm_walk {
bool can_swap;
bool force_scan;
};
+#endif
void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
@@ -613,8 +615,12 @@ struct lruvec {
#ifdef CONFIG_LRU_GEN
/* evictable pages divided into generations */
struct lru_gen_folio lrugen;
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* to concurrently iterate lru_gen_mm_list */
struct lru_gen_mm_state mm_state;
+#else
+ bool seq_update_progress;
+#endif
#endif
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
@@ -1354,8 +1360,10 @@ typedef struct pglist_data {
unsigned long flags;
#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* kswap mm walk data */
struct lru_gen_mm_walk mm_walk;
+#endif
/* lru_gen_folio list */
struct lru_gen_memcg memcg_lru;
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3c69cb653cb9..ce09b1e44275 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -155,7 +155,7 @@ union swap_header {
struct reclaim_state {
/* pages reclaimed outside of LRU-based reclaim */
unsigned long reclaimed;
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* per-thread mm walk data */
struct lru_gen_mm_walk *mm_walk;
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..2c9e21e39f84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2939,7 +2939,7 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
- if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ if (IS_ENABLED(CONFIG_LRU_TASK_PAGE_AGING) && !(clone_flags & CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p->mm);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 959d6a27e23d..d8fe30d880c6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6404,7 +6404,7 @@ static void mem_cgroup_move_task(void)
}
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f277beba556c..207e62d42888 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3304,6 +3304,7 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/******************************************************************************
* Bloom filters
******************************************************************************/
@@ -3650,6 +3651,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
return success;
}
+#endif
/******************************************************************************
* PID controller
@@ -3819,6 +3821,8 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
return folio;
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
+
/* promote pages accessed through page tables */
static int folio_update_gen(struct folio *folio, int gen)
{
@@ -3882,6 +3886,16 @@ static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk
}
}
+static void reset_current_reclaim_batch_size(struct lruvec *lruvec)
+{
+ struct lru_gen_mm_walk *walk;
+
+ walk = current->reclaim_state->mm_walk;
+ if (walk && walk->batched)
+ return reset_batch_size(lruvec, walk);
+
+}
+
static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
{
struct address_space *mapping;
@@ -4304,7 +4318,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
} while (err == -EAGAIN);
}
-static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
+static void *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
{
struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
@@ -4335,6 +4349,23 @@ static void clear_mm_walk(void)
if (!current_is_kswapd())
kfree(walk);
}
+#else
+
+static void reset_current_reclaim_batch_size(struct lruvec *lruvec)
+{
+
+}
+
+static inline void *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
+{
+ return NULL;
+}
+
+static inline void clear_mm_walk(void)
+{
+
+}
+#endif
static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
{
@@ -4468,11 +4499,15 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
/* make sure preceding modifications appear */
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
+#ifndef CONFIG_LRU_TASK_PAGE_AGING
+ lruvec->seq_update_progress = false;
+#endif
spin_unlock_irq(&lruvec->lru_lock);
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- struct scan_control *sc, bool can_swap, bool force_scan)
+ bool can_swap, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -4498,7 +4533,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
goto done;
}
- walk = set_mm_walk(NULL, true);
+ walk = (struct lru_gen_mm_walk *)set_mm_walk(NULL, true);
if (!walk) {
success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done;
@@ -4520,6 +4555,51 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
return success;
}
+#else
+
+/*
+ * inc_max_seq can drop the lru_lock in between. So use a waitqueue seq_update_progress
+ * to allow concurrent access.
+ */
+bool __try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ bool can_swap, bool force_scan)
+{
+ bool success = false;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+
+ VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
+
+ /* see the comment in iterate_mm_list() */
+ if (lruvec->seq_update_progress)
+ success = false;
+ else {
+ spin_lock_irq(&lruvec->lru_lock);
+
+ if (max_seq != lrugen->max_seq)
+ goto done;
+
+ if (lruvec->seq_update_progress)
+ goto done;
+
+ success = true;
+ lruvec->seq_update_progress = true;
+done:
+ spin_unlock_irq(&lruvec->lru_lock);
+ }
+
+ if (success)
+ inc_max_seq(lruvec, can_swap, force_scan);
+
+ return success;
+}
+
+static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ bool can_swap, bool force_scan)
+{
+ return __try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+}
+#endif
+
/******************************************************************************
* working set protection
@@ -4630,6 +4710,7 @@ static void __look_around_gen_update(struct folio *folio, int new_gen)
folio_activate(folio);
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
static inline bool current_reclaim_state_can_swap(void)
{
if (current->reclaim_state)
@@ -4651,6 +4732,18 @@ static void look_around_gen_update(struct folio *folio, int new_gen)
}
return __look_around_gen_update(folio, new_gen);
}
+#else
+
+static inline bool current_reclaim_state_can_swap(void)
+{
+ return true;
+}
+
+static inline void look_around_gen_update(struct folio *folio, int new_gen)
+{
+ return __look_around_gen_update(folio, new_gen);
+}
+#endif
/*
* This function exploits spatial locality when shrink_folio_list() walks the
@@ -4714,7 +4807,6 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
folio = get_pfn_folio(pfn, memcg, pgdat,
current_reclaim_state_can_swap());
-
if (!folio)
continue;
@@ -4734,9 +4826,11 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
arch_leave_lazy_mmu_mode();
mem_cgroup_unlock_pages();
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* feedback from rmap walkers to page table walkers */
if (suitable_to_scan(i, young))
update_bloom_filter(lruvec, max_seq, pvmw->pmd);
+#endif
}
/******************************************************************************
@@ -5156,7 +5250,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
struct folio *next;
enum vm_event_item item;
struct reclaim_stat stat;
- struct lru_gen_mm_walk *walk;
bool skip_retry = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -5211,9 +5304,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
move_folios_to_lru(lruvec, &list);
- walk = current->reclaim_state->mm_walk;
- if (walk && walk->batched)
- reset_batch_size(lruvec, walk);
+ reset_current_reclaim_batch_size(lruvec);
item = PGSTEAL_KSWAPD + reclaimer_offset();
if (!cgroup_reclaim(sc))
@@ -5321,7 +5412,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
return nr_to_scan;
/* skip this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
+ return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
}
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
@@ -5929,6 +6020,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_putc(m, '\n');
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
const char *s = " ";
@@ -5945,6 +6037,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_printf(m, " %10lu%c", n, s[i]);
}
seq_putc(m, '\n');
+#endif
}
/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
@@ -6026,7 +6119,7 @@ static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_contr
if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
return -ERANGE;
- try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
+ try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
return 0;
}
@@ -6218,7 +6311,12 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
for_each_gen_type_zone(gen, type, zone)
INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
lruvec->mm_state.seq = MIN_NR_GENS;
+#else
+ lruvec->seq_update_progress = false;
+#endif
+
}
#ifdef CONFIG_MEMCG
@@ -6237,16 +6335,20 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
INIT_LIST_HEAD(&memcg->mm_list.fifo);
spin_lock_init(&memcg->mm_list.lock);
+
+#endif
}
void lru_gen_exit_memcg(struct mem_cgroup *memcg)
{
- int i;
int nid;
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
+#endif
for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
@@ -6256,10 +6358,12 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
lruvec->lrugen.list.next = LIST_POISON1;
- for (i = 0; i < NR_BLOOM_FILTERS; i++) {
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
+ for (int i = 0; i < NR_BLOOM_FILTERS; i++) {
bitmap_free(lruvec->mm_state.filters[i]);
lruvec->mm_state.filters[i] = NULL;
}
+#endif
}
}
--
2.40.1
next prev parent reply other threads:[~2023-06-13 12:01 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-13 12:00 [PATCH 1/3] mm/lru_gen: Move some code around so that next patch is simpler Aneesh Kumar K.V
2023-06-13 12:00 ` [PATCH 2/3] mm/lru_gen: lru_gen_look_around simplification Aneesh Kumar K.V
2023-06-13 12:00 ` Aneesh Kumar K.V [this message]
2023-06-13 12:23 ` [PATCH 3/3] mm/lru_gen: Don't build multi-gen LRU page table walk code on architecture not supported Matthew Wilcox
2023-06-13 13:28 ` Aneesh Kumar K V
2023-06-13 13:36 ` Matthew Wilcox
2023-06-13 13:47 ` Aneesh Kumar K V
2023-06-21 2:27 ` kernel test robot
2023-06-24 14:53 ` Aneesh Kumar K.V
2023-06-25 19:34 ` Yu Zhao
2023-06-26 10:52 ` Aneesh Kumar K V
2023-06-26 17:04 ` Yu Zhao
2023-06-27 11:48 ` Aneesh Kumar K V
2023-06-27 19:10 ` Yu Zhao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230613120047.149573-3-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=linux-mm@kvack.org \
--cc=talumbau@google.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox