On Fri, 2026-04-03 at 15:40 -0400, Johannes Weiner wrote:
@@ -755,6 +752,9 @@ struct per_cpu_pages {
#endif
short free_count; /* consecutive free count */
+ /* Pageblocks owned by this CPU, for fragment recovery */
+ struct list_head owned_blocks;
+
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
} ____cacheline_aligned_in_smp;
+ /*
+ * Phase 0: Recover fragments from owned blocks.
+ *
+ * The owned_blocks list tracks blocks that have fragments
+ * sitting in zone buddy (put there by drains). Pull matching
+ * fragments back to PCP with PagePCPBuddy so they participate
+ * in merging, instead of claiming fresh blocks and spreading
+ * fragmentation further.
+ *
+ * Only recover blocks matching the requested migratetype.
+ * After recovery, remove the block from the list -- the drain
+ * path re-adds it if new fragments arrive.
+ */
+ list_for_each_entry_safe(pbd, tmp, &pcp->owned_blocks, cpu_node) {
+ unsigned long base_pfn, pfn;
+ int block_mt;
+
+ base_pfn = pbd->block_pfn;
+ block_mt = pbd_migratetype(pbd);
+ if (block_mt != migratetype)
+ continue;
GIven that you just skip over blocks of the wrong migratetype,
I wonder if it makes sense to have a different list head for each
migratetype in the per_cpu_pages struct.
Not that I should be saying anything that would slow down
the merging of these patches, since making the buddy allocator
more of a slow path is pretty much a prerequisite for the 1GB
allocation stuff I'm working on :)