* [PATCH] mm/page_alloc: simplify and cleanup pcp locking
@ 2025-10-15 9:36 Vlastimil Babka
2025-10-15 14:51 ` Joshua Hahn
0 siblings, 1 reply; 4+ messages in thread
From: Vlastimil Babka @ 2025-10-15 9:36 UTC (permalink / raw)
To: Andrew Morton, Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
Johannes Weiner, Zi Yan
Cc: Mel Gorman, linux-mm, linux-kernel, Vlastimil Babka
The pcp locking relies on pcp_spin_trylock() which has to be used
together with pcp_trylock_prepare()/pcp_trylock_finish() to work
properly on !SMP !RT configs. This is tedious and error-prone.
We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
don't use it. Afterwards pcpu_spin_unlock() is only used together with
pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
and handle pcp_trylock_prepare()/finish() within them.
Additionally for the configs where pcp_trylock_prepare() is a no-op (SMP
|| RT) make it pass &UP_flags to a no-op inline function. This ensures
typechecking and makes the local variable "used" so we can remove the
__maybe_unused attributes.
In my compile testing, bloat-o-meter reported no change on SMP config,
so the compiler is capable of optimizing away the no-ops same as before,
and we have simplified the code using pcp_spin_trylock().
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
based on mm-new
---
mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
1 file changed, 40 insertions(+), 59 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0155a66d7367..2bf707f92d83 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
/*
* On SMP, spin_trylock is sufficient protection.
* On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
+ * Pass flags to a no-op inline function to typecheck and silence the unused
+ * variable warning.
*/
-#define pcp_trylock_prepare(flags) do { } while (0)
-#define pcp_trylock_finish(flag) do { } while (0)
+static inline void __pcp_trylock_prepare(unsigned long *flags) { }
+#define pcp_trylock_prepare(flags) __pcp_trylock_prepare(&(flags))
+#define pcp_trylock_finish(flags) do { } while (0)
#else
/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
@@ -129,15 +132,6 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
* Generic helper to lookup and a per-cpu variable with an embedded spinlock.
* Return value should be used with equivalent unlock helper.
*/
-#define pcpu_spin_lock(type, member, ptr) \
-({ \
- type *_ret; \
- pcpu_task_pin(); \
- _ret = this_cpu_ptr(ptr); \
- spin_lock(&_ret->member); \
- _ret; \
-})
-
#define pcpu_spin_trylock(type, member, ptr) \
({ \
type *_ret; \
@@ -157,14 +151,21 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
})
/* struct per_cpu_pages specific helpers. */
-#define pcp_spin_lock(ptr) \
- pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
-
-#define pcp_spin_trylock(ptr) \
- pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
+#define pcp_spin_trylock(ptr, UP_flags) \
+({ \
+ struct per_cpu_pages *__ret; \
+ pcp_trylock_prepare(UP_flags); \
+ __ret = pcpu_spin_trylock(struct per_cpu_pages, lock, ptr); \
+ if (!__ret) \
+ pcp_trylock_finish(UP_flags); \
+ __ret; \
+})
-#define pcp_spin_unlock(ptr) \
- pcpu_spin_unlock(lock, ptr)
+#define pcp_spin_unlock(ptr, UP_flags) \
+({ \
+ pcpu_spin_unlock(lock, ptr); \
+ pcp_trylock_finish(UP_flags); \
+})
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
@@ -2887,13 +2888,10 @@ static bool free_frozen_page_commit(struct zone *zone,
if (to_free == 0 || pcp->count == 0)
break;
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(*UP_flags);
+ pcp_spin_unlock(pcp, *UP_flags);
- pcp_trylock_prepare(*UP_flags);
- pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset, *UP_flags);
if (!pcp) {
- pcp_trylock_finish(*UP_flags);
ret = false;
break;
}
@@ -2904,8 +2902,7 @@ static bool free_frozen_page_commit(struct zone *zone,
* returned in an unlocked state.
*/
if (smp_processor_id() != cpu) {
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(*UP_flags);
+ pcp_spin_unlock(pcp, *UP_flags);
ret = false;
break;
}
@@ -2937,7 +2934,7 @@ static bool free_frozen_page_commit(struct zone *zone,
static void __free_frozen_pages(struct page *page, unsigned int order,
fpi_t fpi_flags)
{
- unsigned long __maybe_unused UP_flags;
+ unsigned long UP_flags;
struct per_cpu_pages *pcp;
struct zone *zone;
unsigned long pfn = page_to_pfn(page);
@@ -2973,17 +2970,15 @@ static void __free_frozen_pages(struct page *page, unsigned int order,
add_page_to_zone_llist(zone, page, order);
return;
}
- pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
if (pcp) {
if (!free_frozen_page_commit(zone, pcp, page, migratetype,
order, fpi_flags, &UP_flags))
return;
- pcp_spin_unlock(pcp);
+ pcp_spin_unlock(pcp, UP_flags);
} else {
free_one_page(zone, page, pfn, order, fpi_flags);
}
- pcp_trylock_finish(UP_flags);
}
void free_frozen_pages(struct page *page, unsigned int order)
@@ -2996,7 +2991,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
*/
void free_unref_folios(struct folio_batch *folios)
{
- unsigned long __maybe_unused UP_flags;
+ unsigned long UP_flags;
struct per_cpu_pages *pcp = NULL;
struct zone *locked_zone = NULL;
int i, j;
@@ -3039,8 +3034,7 @@ void free_unref_folios(struct folio_batch *folios)
if (zone != locked_zone ||
is_migrate_isolate(migratetype)) {
if (pcp) {
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(UP_flags);
+ pcp_spin_unlock(pcp, UP_flags);
locked_zone = NULL;
pcp = NULL;
}
@@ -3059,10 +3053,8 @@ void free_unref_folios(struct folio_batch *folios)
* trylock is necessary as folios may be getting freed
* from IRQ or SoftIRQ context after an IO completion.
*/
- pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
if (unlikely(!pcp)) {
- pcp_trylock_finish(UP_flags);
free_one_page(zone, &folio->page, pfn,
order, FPI_NONE);
continue;
@@ -3085,10 +3077,8 @@ void free_unref_folios(struct folio_batch *folios)
}
}
- if (pcp) {
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(UP_flags);
- }
+ if (pcp)
+ pcp_spin_unlock(pcp, UP_flags);
folio_batch_reinit(folios);
}
@@ -3339,15 +3329,12 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct per_cpu_pages *pcp;
struct list_head *list;
struct page *page;
- unsigned long __maybe_unused UP_flags;
+ unsigned long UP_flags;
/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
- pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock(zone->per_cpu_pageset);
- if (!pcp) {
- pcp_trylock_finish(UP_flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
+ if (!pcp)
return NULL;
- }
/*
* On allocation, reduce the number of pages that are batch freed.
@@ -3357,8 +3344,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
pcp->free_count >>= 1;
list = &pcp->lists[order_to_pindex(migratetype, order)];
page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(UP_flags);
+ pcp_spin_unlock(pcp, UP_flags);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone, 1);
@@ -5045,7 +5031,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
struct page **page_array)
{
struct page *page;
- unsigned long __maybe_unused UP_flags;
+ unsigned long UP_flags;
struct zone *zone;
struct zoneref *z;
struct per_cpu_pages *pcp;
@@ -5139,10 +5125,9 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
goto failed;
/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
- pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
if (!pcp)
- goto failed_irq;
+ goto failed;
/* Attempt the batch allocation */
pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
@@ -5159,8 +5144,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
if (unlikely(!page)) {
/* Try and allocate at least one page */
if (!nr_account) {
- pcp_spin_unlock(pcp);
- goto failed_irq;
+ pcp_spin_unlock(pcp, UP_flags);
+ goto failed;
}
break;
}
@@ -5171,8 +5156,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
page_array[nr_populated++] = page;
}
- pcp_spin_unlock(pcp);
- pcp_trylock_finish(UP_flags);
+ pcp_spin_unlock(pcp, UP_flags);
__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
@@ -5180,9 +5164,6 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
out:
return nr_populated;
-failed_irq:
- pcp_trylock_finish(UP_flags);
-
failed:
page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
if (page)
---
base-commit: 550b531346a7e4e7ad31813d0d1d6a6d8c10a06f
change-id: 20251015-b4-pcp-lock-cleanup-9b70b417a20e
Best regards,
--
Vlastimil Babka <vbabka@suse.cz>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/page_alloc: simplify and cleanup pcp locking
2025-10-15 9:36 [PATCH] mm/page_alloc: simplify and cleanup pcp locking Vlastimil Babka
@ 2025-10-15 14:51 ` Joshua Hahn
2025-10-15 16:08 ` Vlastimil Babka
0 siblings, 1 reply; 4+ messages in thread
From: Joshua Hahn @ 2025-10-15 14:51 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Andrew Morton, Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
Johannes Weiner, Zi Yan, Mel Gorman, linux-mm, linux-kernel
On Wed, 15 Oct 2025 11:36:09 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:
> The pcp locking relies on pcp_spin_trylock() which has to be used
> together with pcp_trylock_prepare()/pcp_trylock_finish() to work
> properly on !SMP !RT configs. This is tedious and error-prone.
>
> We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
> don't use it. Afterwards pcpu_spin_unlock() is only used together with
> pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
> and handle pcp_trylock_prepare()/finish() within them.
>
> Additionally for the configs where pcp_trylock_prepare() is a no-op (SMP
> || RT) make it pass &UP_flags to a no-op inline function. This ensures
> typechecking and makes the local variable "used" so we can remove the
> __maybe_unused attributes.
>
> In my compile testing, bloat-o-meter reported no change on SMP config,
> so the compiler is capable of optimizing away the no-ops same as before,
> and we have simplified the code using pcp_spin_trylock().
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Hello Vlastimil, I hope you are doing well!
Thank you for this patch. This is a pattern that I found quite cumbersome,
so this patch really makes the code so much easier to understand and read.
> ---
> based on mm-new
> ---
> mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
> 1 file changed, 40 insertions(+), 59 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0155a66d7367..2bf707f92d83 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
> /*
> * On SMP, spin_trylock is sufficient protection.
> * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
> + * Pass flags to a no-op inline function to typecheck and silence the unused
> + * variable warning.
> */
> -#define pcp_trylock_prepare(flags) do { } while (0)
> -#define pcp_trylock_finish(flag) do { } while (0)
> +static inline void __pcp_trylock_prepare(unsigned long *flags) { }
> +#define pcp_trylock_prepare(flags) __pcp_trylock_prepare(&(flags))
> +#define pcp_trylock_finish(flags) do { } while (0)
> #else
I have one question here. I was a bit unsure why we do the typechecking and
silencing for the unused variable warning for only pcp_trylock_prepare, but
not for pcp_trylock_finish. Is it because pcp_trylock_finish will always
be called after pcp_trylock_prepare, so the flag will have been used at
that point?
I was concerned that there would have been some area where only
pcp_trylock_finish would have been used, but compiling with W=1 seems to show
no errors on my end : -) So it looks good to me! Feel free to add:
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Thank you! I hope you have a great day!
Joshua
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/page_alloc: simplify and cleanup pcp locking
2025-10-15 14:51 ` Joshua Hahn
@ 2025-10-15 16:08 ` Vlastimil Babka
2025-10-15 18:50 ` Suren Baghdasaryan
0 siblings, 1 reply; 4+ messages in thread
From: Vlastimil Babka @ 2025-10-15 16:08 UTC (permalink / raw)
To: Joshua Hahn
Cc: Andrew Morton, Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
Johannes Weiner, Zi Yan, Mel Gorman, linux-mm, linux-kernel
On 10/15/25 16:51, Joshua Hahn wrote:
> On Wed, 15 Oct 2025 11:36:09 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:
>
>> The pcp locking relies on pcp_spin_trylock() which has to be used
>> together with pcp_trylock_prepare()/pcp_trylock_finish() to work
>> properly on !SMP !RT configs. This is tedious and error-prone.
>>
>> We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
>> don't use it. Afterwards pcpu_spin_unlock() is only used together with
>> pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
>> and handle pcp_trylock_prepare()/finish() within them.
>>
>> Additionally for the configs where pcp_trylock_prepare() is a no-op (SMP
>> || RT) make it pass &UP_flags to a no-op inline function. This ensures
>> typechecking and makes the local variable "used" so we can remove the
>> __maybe_unused attributes.
>>
>> In my compile testing, bloat-o-meter reported no change on SMP config,
>> so the compiler is capable of optimizing away the no-ops same as before,
>> and we have simplified the code using pcp_spin_trylock().
>>
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>
> Hello Vlastimil, I hope you are doing well!
>
> Thank you for this patch. This is a pattern that I found quite cumbersome,
> so this patch really makes the code so much easier to understand and read.
Hi, that's good to hear!
>> ---
>> based on mm-new
>> ---
>> mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
>> 1 file changed, 40 insertions(+), 59 deletions(-)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 0155a66d7367..2bf707f92d83 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
>> /*
>> * On SMP, spin_trylock is sufficient protection.
>> * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
>> + * Pass flags to a no-op inline function to typecheck and silence the unused
>> + * variable warning.
>> */
>> -#define pcp_trylock_prepare(flags) do { } while (0)
>> -#define pcp_trylock_finish(flag) do { } while (0)
>> +static inline void __pcp_trylock_prepare(unsigned long *flags) { }
>> +#define pcp_trylock_prepare(flags) __pcp_trylock_prepare(&(flags))
>> +#define pcp_trylock_finish(flags) do { } while (0)
>> #else
>
> I have one question here. I was a bit unsure why we do the typechecking and
> silencing for the unused variable warning for only pcp_trylock_prepare, but
> not for pcp_trylock_finish. Is it because pcp_trylock_finish will always
> be called after pcp_trylock_prepare, so the flag will have been used at
> that point?
Exactly.
> I was concerned that there would have been some area where only
> pcp_trylock_finish would have been used, but compiling with W=1 seems to show
> no errors on my end : -) So it looks good to me! Feel free to add:
Yeah we can change that if ever we end up with some code that needs it
(hopefully not).
> Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Thanks!
> Thank you! I hope you have a great day!
> Joshua
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/page_alloc: simplify and cleanup pcp locking
2025-10-15 16:08 ` Vlastimil Babka
@ 2025-10-15 18:50 ` Suren Baghdasaryan
0 siblings, 0 replies; 4+ messages in thread
From: Suren Baghdasaryan @ 2025-10-15 18:50 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Joshua Hahn, Andrew Morton, Michal Hocko, Brendan Jackman,
Johannes Weiner, Zi Yan, Mel Gorman, linux-mm, linux-kernel
On Wed, Oct 15, 2025 at 9:08 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 10/15/25 16:51, Joshua Hahn wrote:
> > On Wed, 15 Oct 2025 11:36:09 +0200 Vlastimil Babka <vbabka@suse.cz> wrote:
> >
> >> The pcp locking relies on pcp_spin_trylock() which has to be used
> >> together with pcp_trylock_prepare()/pcp_trylock_finish() to work
> >> properly on !SMP !RT configs. This is tedious and error-prone.
> >>
> >> We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
> >> don't use it. Afterwards pcpu_spin_unlock() is only used together with
> >> pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
> >> and handle pcp_trylock_prepare()/finish() within them.
> >>
> >> Additionally for the configs where pcp_trylock_prepare() is a no-op (SMP
> >> || RT) make it pass &UP_flags to a no-op inline function. This ensures
> >> typechecking and makes the local variable "used" so we can remove the
> >> __maybe_unused attributes.
> >>
> >> In my compile testing, bloat-o-meter reported no change on SMP config,
> >> so the compiler is capable of optimizing away the no-ops same as before,
> >> and we have simplified the code using pcp_spin_trylock().
> >>
> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >
> > Hello Vlastimil, I hope you are doing well!
> >
> > Thank you for this patch. This is a pattern that I found quite cumbersome,
> > so this patch really makes the code so much easier to understand and read.
>
> Hi, that's good to hear!
> >> ---
> >> based on mm-new
> >> ---
> >> mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
> >> 1 file changed, 40 insertions(+), 59 deletions(-)
> >>
> >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> >> index 0155a66d7367..2bf707f92d83 100644
> >> --- a/mm/page_alloc.c
> >> +++ b/mm/page_alloc.c
> >> @@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
> >> /*
> >> * On SMP, spin_trylock is sufficient protection.
> >> * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
> >> + * Pass flags to a no-op inline function to typecheck and silence the unused
> >> + * variable warning.
> >> */
> >> -#define pcp_trylock_prepare(flags) do { } while (0)
> >> -#define pcp_trylock_finish(flag) do { } while (0)
> >> +static inline void __pcp_trylock_prepare(unsigned long *flags) { }
> >> +#define pcp_trylock_prepare(flags) __pcp_trylock_prepare(&(flags))
> >> +#define pcp_trylock_finish(flags) do { } while (0)
> >> #else
> >
> > I have one question here. I was a bit unsure why we do the typechecking and
> > silencing for the unused variable warning for only pcp_trylock_prepare, but
> > not for pcp_trylock_finish. Is it because pcp_trylock_finish will always
> > be called after pcp_trylock_prepare, so the flag will have been used at
> > that point?
>
> Exactly.
>
> > I was concerned that there would have been some area where only
> > pcp_trylock_finish would have been used, but compiling with W=1 seems to show
> > no errors on my end : -) So it looks good to me! Feel free to add:
>
> Yeah we can change that if ever we end up with some code that needs it
> (hopefully not).
>
> > Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Very nice!
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
>
> Thanks!
>
> > Thank you! I hope you have a great day!
> > Joshua
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-10-15 18:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-15 9:36 [PATCH] mm/page_alloc: simplify and cleanup pcp locking Vlastimil Babka
2025-10-15 14:51 ` Joshua Hahn
2025-10-15 16:08 ` Vlastimil Babka
2025-10-15 18:50 ` Suren Baghdasaryan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox