From: Marcelo Tosatti <mtosatti@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Lameter <cl@linux.com>,
Pekka Enberg <penberg@kernel.org>,
David Rientjes <rientjes@google.com>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Vlastimil Babka <vbabka@suse.cz>,
Hyeonggon Yoo <42.hyeyoo@gmail.com>,
Leonardo Bras <leobras.c@gmail.com>,
Thomas Gleixner <tglx@linutronix.de>,
Waiman Long <longman@redhat.com>,
Boqun Feun <boqun.feng@gmail.com>,
Frederic Weisbecker <frederic@kernel.org>,
Marcelo Tosatti <mtosatti@redhat.com>
Subject: [PATCH v2 4/5] swap: apply new queue_percpu_work_on() interface
Date: Mon, 02 Mar 2026 12:49:49 -0300 [thread overview]
Message-ID: <20260302155105.275396307@redhat.com> (raw)
In-Reply-To: <20260302154945.143996316@redhat.com>
Make use of the new qpw_{un,}lock*() and queue_percpu_work_on()
interface to improve performance & latency.
For functions that may be scheduled in a different cpu, replace
local_{un,}lock*() by qpw_{un,}lock*(), and replace schedule_work_on() by
queue_percpu_work_on(). The same happens for flush_work() and
flush_percpu_work().
The change requires allocation of qpw_structs instead of a work_structs,
and changing parameters of a few functions to include the cpu parameter.
This should bring no relevant performance impact on non-QPW kernels:
For functions that may be scheduled in a different cpu, the local_*lock's
this_cpu_ptr() becomes a per_cpu_ptr(smp_processor_id()).
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
mm/internal.h | 4 ++-
mm/mlock.c | 51 ++++++++++++++++++++++++++++++-----------
mm/page_alloc.c | 2 -
mm/swap.c | 69 ++++++++++++++++++++++++++++++--------------------------
4 files changed, 79 insertions(+), 47 deletions(-)
Index: linux/mm/mlock.c
===================================================================
--- linux.orig/mm/mlock.c
+++ linux/mm/mlock.c
@@ -25,17 +25,16 @@
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/secretmem.h>
+#include <linux/qpw.h>
#include "internal.h"
struct mlock_fbatch {
- local_lock_t lock;
+ qpw_lock_t lock;
struct folio_batch fbatch;
};
-static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = {
- .lock = INIT_LOCAL_LOCK(lock),
-};
+static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch);
bool can_do_mlock(void)
{
@@ -209,18 +208,29 @@ static void mlock_folio_batch(struct fol
folios_put(fbatch);
}
+void mlock_drain_cpu(int cpu)
+{
+ struct folio_batch *fbatch;
+
+ qpw_lock(&mlock_fbatch.lock, cpu);
+ fbatch = per_cpu_ptr(&mlock_fbatch.fbatch, cpu);
+ if (folio_batch_count(fbatch))
+ mlock_folio_batch(fbatch);
+ qpw_unlock(&mlock_fbatch.lock, cpu);
+}
+
void mlock_drain_local(void)
{
struct folio_batch *fbatch;
- local_lock(&mlock_fbatch.lock);
+ local_qpw_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
if (folio_batch_count(fbatch))
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ local_qpw_unlock(&mlock_fbatch.lock);
}
-void mlock_drain_remote(int cpu)
+void mlock_drain_offline(int cpu)
{
struct folio_batch *fbatch;
@@ -243,7 +253,7 @@ void mlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
- local_lock(&mlock_fbatch.lock);
+ local_qpw_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
if (!folio_test_set_mlocked(folio)) {
@@ -257,7 +267,7 @@ void mlock_folio(struct folio *folio)
if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ local_qpw_unlock(&mlock_fbatch.lock);
}
/**
@@ -269,7 +279,7 @@ void mlock_new_folio(struct folio *folio
struct folio_batch *fbatch;
int nr_pages = folio_nr_pages(folio);
- local_lock(&mlock_fbatch.lock);
+ local_qpw_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
folio_set_mlocked(folio);
@@ -280,7 +290,7 @@ void mlock_new_folio(struct folio *folio
if (!folio_batch_add(fbatch, mlock_new(folio)) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ local_qpw_unlock(&mlock_fbatch.lock);
}
/**
@@ -291,7 +301,7 @@ void munlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
- local_lock(&mlock_fbatch.lock);
+ local_qpw_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
/*
* folio_test_clear_mlocked(folio) must be left to __munlock_folio(),
@@ -301,7 +311,7 @@ void munlock_folio(struct folio *folio)
if (!folio_batch_add(fbatch, folio) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ local_qpw_unlock(&mlock_fbatch.lock);
}
static inline unsigned int folio_mlock_step(struct folio *folio,
@@ -823,3 +833,18 @@ void user_shm_unlock(size_t size, struct
spin_unlock(&shmlock_user_lock);
put_ucounts(ucounts);
}
+
+int __init mlock_init(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct mlock_fbatch *fbatch = &per_cpu(mlock_fbatch, cpu);
+
+ qpw_lock_init(&fbatch->lock);
+ }
+
+ return 0;
+}
+
+module_init(mlock_init);
Index: linux/mm/swap.c
===================================================================
--- linux.orig/mm/swap.c
+++ linux/mm/swap.c
@@ -35,7 +35,7 @@
#include <linux/uio.h>
#include <linux/hugetlb.h>
#include <linux/page_idle.h>
-#include <linux/local_lock.h>
+#include <linux/qpw.h>
#include <linux/buffer_head.h>
#include "internal.h"
@@ -52,7 +52,7 @@ struct cpu_fbatches {
* The following folio batches are grouped together because they are protected
* by disabling preemption (and interrupts remain enabled).
*/
- local_lock_t lock;
+ qpw_lock_t lock;
struct folio_batch lru_add;
struct folio_batch lru_deactivate_file;
struct folio_batch lru_deactivate;
@@ -61,14 +61,11 @@ struct cpu_fbatches {
struct folio_batch lru_activate;
#endif
/* Protecting the following batches which require disabling interrupts */
- local_lock_t lock_irq;
+ qpw_lock_t lock_irq;
struct folio_batch lru_move_tail;
};
-static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
- .lock = INIT_LOCAL_LOCK(lock),
- .lock_irq = INIT_LOCAL_LOCK(lock_irq),
-};
+static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches);
static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp,
unsigned long *flagsp)
@@ -187,18 +184,18 @@ static void __folio_batch_add_and_move(s
folio_get(folio);
if (disable_irq)
- local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
+ local_qpw_lock_irqsave(&cpu_fbatches.lock_irq, flags);
else
- local_lock(&cpu_fbatches.lock);
+ local_qpw_lock(&cpu_fbatches.lock);
if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
if (disable_irq)
- local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+ local_qpw_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
else
- local_unlock(&cpu_fbatches.lock);
+ local_qpw_unlock(&cpu_fbatches.lock);
}
#define folio_batch_add_and_move(folio, op) \
@@ -359,7 +356,7 @@ static void __lru_cache_activate_folio(s
struct folio_batch *fbatch;
int i;
- local_lock(&cpu_fbatches.lock);
+ local_qpw_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
/*
@@ -381,7 +378,7 @@ static void __lru_cache_activate_folio(s
}
}
- local_unlock(&cpu_fbatches.lock);
+ local_qpw_unlock(&cpu_fbatches.lock);
}
#ifdef CONFIG_LRU_GEN
@@ -653,9 +650,9 @@ void lru_add_drain_cpu(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
+ qpw_lock_irqsave(&cpu_fbatches.lock_irq, flags, cpu);
folio_batch_move_lru(fbatch, lru_move_tail);
- local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+ qpw_unlock_irqrestore(&cpu_fbatches.lock_irq, flags, cpu);
}
fbatch = &fbatches->lru_deactivate_file;
@@ -733,9 +730,9 @@ void folio_mark_lazyfree(struct folio *f
void lru_add_drain(void)
{
- local_lock(&cpu_fbatches.lock);
+ local_qpw_lock(&cpu_fbatches.lock);
lru_add_drain_cpu(smp_processor_id());
- local_unlock(&cpu_fbatches.lock);
+ local_qpw_unlock(&cpu_fbatches.lock);
mlock_drain_local();
}
@@ -745,30 +742,30 @@ void lru_add_drain(void)
* the same cpu. It shouldn't be a problem in !SMP case since
* the core is only one and the locks will disable preemption.
*/
-static void lru_add_mm_drain(void)
+static void lru_add_mm_drain(int cpu)
{
- local_lock(&cpu_fbatches.lock);
- lru_add_drain_cpu(smp_processor_id());
- local_unlock(&cpu_fbatches.lock);
- mlock_drain_local();
+ qpw_lock(&cpu_fbatches.lock, cpu);
+ lru_add_drain_cpu(cpu);
+ qpw_unlock(&cpu_fbatches.lock, cpu);
+ mlock_drain_cpu(cpu);
}
void lru_add_drain_cpu_zone(struct zone *zone)
{
- local_lock(&cpu_fbatches.lock);
+ local_qpw_lock(&cpu_fbatches.lock);
lru_add_drain_cpu(smp_processor_id());
drain_local_pages(zone);
- local_unlock(&cpu_fbatches.lock);
+ local_qpw_unlock(&cpu_fbatches.lock);
mlock_drain_local();
}
#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+static DEFINE_PER_CPU(struct qpw_struct, lru_add_drain_qpw);
-static void lru_add_drain_per_cpu(struct work_struct *dummy)
+static void lru_add_drain_per_cpu(struct work_struct *w)
{
- lru_add_mm_drain();
+ lru_add_mm_drain(qpw_get_cpu(w));
}
static DEFINE_PER_CPU(struct work_struct, bh_add_drain_work);
@@ -883,12 +880,12 @@ static inline void __lru_add_drain_all(b
cpumask_clear(&has_mm_work);
cpumask_clear(&has_bh_work);
for_each_online_cpu(cpu) {
- struct work_struct *mm_work = &per_cpu(lru_add_drain_work, cpu);
+ struct qpw_struct *mm_qpw = &per_cpu(lru_add_drain_qpw, cpu);
struct work_struct *bh_work = &per_cpu(bh_add_drain_work, cpu);
if (cpu_needs_mm_drain(cpu)) {
- INIT_WORK(mm_work, lru_add_drain_per_cpu);
- queue_work_on(cpu, mm_percpu_wq, mm_work);
+ INIT_QPW(mm_qpw, lru_add_drain_per_cpu, cpu);
+ queue_percpu_work_on(cpu, mm_percpu_wq, mm_qpw);
__cpumask_set_cpu(cpu, &has_mm_work);
}
@@ -900,7 +897,7 @@ static inline void __lru_add_drain_all(b
}
for_each_cpu(cpu, &has_mm_work)
- flush_work(&per_cpu(lru_add_drain_work, cpu));
+ flush_percpu_work(&per_cpu(lru_add_drain_qpw, cpu));
for_each_cpu(cpu, &has_bh_work)
flush_work(&per_cpu(bh_add_drain_work, cpu));
@@ -950,7 +947,7 @@ void lru_cache_disable(void)
#ifdef CONFIG_SMP
__lru_add_drain_all(true);
#else
- lru_add_mm_drain();
+ lru_add_mm_drain(smp_processor_id());
invalidate_bh_lrus_cpu();
#endif
}
@@ -1124,6 +1121,7 @@ static const struct ctl_table swap_sysct
void __init swap_setup(void)
{
unsigned long megs = PAGES_TO_MB(totalram_pages());
+ unsigned int cpu;
/* Use a smaller cluster for small-memory machines */
if (megs < 16)
@@ -1136,4 +1134,11 @@ void __init swap_setup(void)
*/
register_sysctl_init("vm", swap_sysctl_table);
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
+
+ qpw_lock_init(&fbatches->lock);
+ qpw_lock_init(&fbatches->lock_irq);
+ }
}
Index: linux/mm/internal.h
===================================================================
--- linux.orig/mm/internal.h
+++ linux/mm/internal.h
@@ -1140,10 +1140,12 @@ static inline void munlock_vma_folio(str
munlock_folio(folio);
}
+int __init mlock_init(void);
void mlock_new_folio(struct folio *folio);
bool need_mlock_drain(int cpu);
void mlock_drain_local(void);
-void mlock_drain_remote(int cpu);
+void mlock_drain_cpu(int cpu);
+void mlock_drain_offline(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
Index: linux/mm/page_alloc.c
===================================================================
--- linux.orig/mm/page_alloc.c
+++ linux/mm/page_alloc.c
@@ -6285,7 +6285,7 @@ static int page_alloc_cpu_dead(unsigned
struct zone *zone;
lru_add_drain_cpu(cpu);
- mlock_drain_remote(cpu);
+ mlock_drain_offline(cpu);
drain_pages(cpu);
/*
next prev parent reply other threads:[~2026-03-02 15:53 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-02 15:49 [PATCH v2 0/5] Introduce QPW for per-cpu operations (v2) Marcelo Tosatti
2026-03-02 15:49 ` [PATCH v2 1/5] slab: distinguish lock and trylock for sheaf_flush_main() Marcelo Tosatti
2026-03-02 15:49 ` [PATCH v2 2/5] Introducing qpw_lock() and per-cpu queue & flush work Marcelo Tosatti
2026-03-02 15:49 ` [PATCH v2 3/5] mm/swap: move bh draining into a separate workqueue Marcelo Tosatti
2026-03-02 15:49 ` Marcelo Tosatti [this message]
2026-03-02 15:49 ` [PATCH v2 5/5] slub: apply new queue_percpu_work_on() interface Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260302155105.275396307@redhat.com \
--to=mtosatti@redhat.com \
--cc=42.hyeyoo@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=boqun.feng@gmail.com \
--cc=cl@linux.com \
--cc=frederic@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=iamjoonsoo.kim@lge.com \
--cc=leobras.c@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=tglx@linutronix.de \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox