Thank you Mike Rapoport for reviewing this patch series. Please find my responses below. > > > #endif /* _LINUX_VMSTAT_H */ > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index ba6d39b71cb1..ca36751be50e 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -1758,6 +1758,10 @@ static void > __update_and_free_hugetlb_folio(struct hstate *h, > > destroy_compound_gigantic_folio(folio, huge_page_order(h)); > > free_gigantic_folio(folio, huge_page_order(h)); > > } else { > > +#ifndef CONFIG_SPARSEMEM_VMEMMAP > > + __mod_node_page_state(NODE_DATA(page_to_nid(&folio->page)), > > + NR_PAGE_METADATA, > -huge_page_order(h)); > > I don't think memory map will change here with classic SPARSEMEM > Thank you. Yes, I agree with your comment. > > > +#endif > > __free_pages(&folio->page, huge_page_order(h)); > > } > > } > > @@ -2143,7 +2147,9 @@ static struct folio > *alloc_buddy_hugetlb_folio(struct hstate *h, > > __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); > > return NULL; > > } > > - > > +#ifndef CONFIG_SPARSEMEM_VMEMMAP > > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, > huge_page_order(h)); > > +#endif > > __count_vm_event(HTLB_BUDDY_PGALLOC); > > return page_folio(page); > > } > > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > > index 4b9734777f69..7f920bfa8e79 100644 > > --- a/mm/hugetlb_vmemmap.c > > +++ b/mm/hugetlb_vmemmap.c > > @@ -214,6 +214,8 @@ static inline void free_vmemmap_page(struct page > *page) > > free_bootmem_page(page); > > else > > __free_page(page); > > + __mod_node_page_state(NODE_DATA(page_to_nid(page)), > > + NR_PAGE_METADATA, -1); > > } > > > > /* Free a list of the vmemmap pages */ > > @@ -336,6 +338,7 @@ static int vmemmap_remap_free(unsigned long start, > unsigned long end, > > (void *)walk.reuse_addr); > > list_add(&walk.reuse_page->lru, &vmemmap_pages); > > } > > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, 1); > > > > /* > > * In order to make remapping routine most efficient for the huge > pages, > > @@ -387,8 +390,12 @@ static int alloc_vmemmap_page_list(unsigned long > start, unsigned long end, > > > > while (nr_pages--) { > > page = alloc_pages_node(nid, gfp_mask, 0); > > - if (!page) > > + if (!page) { > > goto out; > > + } else { > > + __mod_node_page_state(NODE_DATA(page_to_nid(page)), > > + NR_PAGE_METADATA, 1); > > We can update this once for nr_pages outside the loop, cannot we? > Thank you for the comment. I agree with you and shall incorporate it. > > > + } > > list_add_tail(&page->lru, list); > > } > > > > diff --git a/mm/mm_init.c b/mm/mm_init.c > > index 50f2f34745af..e02dce7e2e9a 100644 > > --- a/mm/mm_init.c > > +++ b/mm/mm_init.c > > @@ -26,6 +26,7 @@ > > #include > > #include > > #include > > +#include > > #include "internal.h" > > #include "slab.h" > > #include "shuffle.h" > > @@ -1656,6 +1657,8 @@ static void __init alloc_node_mem_map(struct > pglist_data *pgdat) > > panic("Failed to allocate %ld bytes for node %d > memory map\n", > > size, pgdat->node_id); > > pgdat->node_mem_map = map + offset; > > + mod_node_early_perpage_metadata(pgdat->node_id, > > + PAGE_ALIGN(size) >> > PAGE_SHIFT); > > } > > pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", > > __func__, pgdat->node_id, (unsigned > long)pgdat, > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > > index 0c5be12f9336..4e295d5087f4 100644 > > --- a/mm/page_alloc.c > > +++ b/mm/page_alloc.c > > @@ -5443,6 +5443,7 @@ void __init setup_per_cpu_pageset(void) > > for_each_online_pgdat(pgdat) > > pgdat->per_cpu_nodestats = > > alloc_percpu(struct per_cpu_nodestat); > > + writeout_early_perpage_metadata(); > > Why it's called here? > You can copy early stats to actual node stats as soon as the nodes and page > allocator are initialized. > Thank you for mentioning this. I agree with you and shall move it there. > > > } > > > > __meminit void zone_pcp_init(struct zone *zone) > > diff --git a/mm/page_ext.c b/mm/page_ext.c > > index 4548fcc66d74..b5b9d3079e20 100644 > > --- a/mm/page_ext.c > > +++ b/mm/page_ext.c > > @@ -201,6 +201,8 @@ static int __init alloc_node_page_ext(int nid) > > return -ENOMEM; > > NODE_DATA(nid)->node_page_ext = base; > > total_usage += table_size; > > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, > > + PAGE_ALIGN(table_size) >> PAGE_SHIFT); > > return 0; > > } > > > > @@ -255,12 +257,15 @@ static void *__meminit alloc_page_ext(size_t size, > int nid) > > void *addr = NULL; > > > > addr = alloc_pages_exact_nid(nid, size, flags); > > - if (addr) { > > + if (addr) > > kmemleak_alloc(addr, size, 1, flags); > > - return addr; > > - } > > + else > > + addr = vzalloc_node(size, nid); > > > > - addr = vzalloc_node(size, nid); > > + if (addr) { > > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, > > + PAGE_ALIGN(size) >> PAGE_SHIFT); > > + } > > > > return addr; > > } > > @@ -314,6 +319,10 @@ static void free_page_ext(void *addr) > > BUG_ON(PageReserved(page)); > > kmemleak_free(addr); > > free_pages_exact(addr, table_size); > > + > > + __mod_node_page_state(NODE_DATA(page_to_nid(page)), > NR_PAGE_METADATA, > > + (long)-1 * (PAGE_ALIGN(table_size) > >> PAGE_SHIFT)); > > + > > what happens with vmalloc()ed page_ext? > Thank you for pointing this out. I shall also make this change for vmalloc()ed page_ext. > > > } > > } > > > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > > index a2cbe44c48e1..e33f302db7c6 100644 > > --- a/mm/sparse-vmemmap.c > > +++ b/mm/sparse-vmemmap.c > > @@ -469,5 +469,8 @@ struct page * __meminit > __populate_section_memmap(unsigned long pfn, > > if (r < 0) > > return NULL; > > > > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, > > + PAGE_ALIGN(end - start) >> PAGE_SHIFT); > > + > > return pfn_to_page(pfn); > > } > > diff --git a/mm/sparse.c b/mm/sparse.c > > index 77d91e565045..db78233a85ef 100644 > > --- a/mm/sparse.c > > +++ b/mm/sparse.c > > @@ -14,7 +14,7 @@ > > #include > > #include > > #include > > - > > +#include > > #include "internal.h" > > #include > > > > @@ -465,6 +465,9 @@ static void __init sparse_buffer_init(unsigned long > size, int nid) > > */ > > sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, > true); > > sparsemap_buf_end = sparsemap_buf + size; > > +#ifndef CONFIG_SPARSEMEM_VMEMMAP > > + mod_node_early_perpage_metadata(nid, PAGE_ALIGN(size) >> > PAGE_SHIFT); > > All early struct pages are allocated in memmap_alloc(). It'd make sense to > update > the counter there. > Thanks for the comment. The reason why we did not do it in memmap_alloc() is because the struct pages can decrease as well. > > > +#endif > > } > > > > static void __init sparse_buffer_fini(void) > > @@ -641,6 +644,8 @@ static void depopulate_section_memmap(unsigned long > pfn, unsigned long nr_pages, > > unsigned long start = (unsigned long) pfn_to_page(pfn); > > unsigned long end = start + nr_pages * sizeof(struct page); > > > > + __mod_node_page_state(NODE_DATA(page_to_nid(pfn_to_page(pfn))), > NR_PAGE_METADATA, > > + (long)-1 * (PAGE_ALIGN(end - start) >> > PAGE_SHIFT)); > > vmemmap_free(start, end, altmap); > > } > > static void free_map_bootmem(struct page *memmap) > > diff --git a/mm/vmstat.c b/mm/vmstat.c > > index 00e81e99c6ee..731eb5264b49 100644 > > --- a/mm/vmstat.c > > +++ b/mm/vmstat.c > > @@ -1245,6 +1245,7 @@ const char * const vmstat_text[] = { > > "pgpromote_success", > > "pgpromote_candidate", > > #endif > > + "nr_page_metadata", > > > > /* enum writeback_stat_item counters */ > > "nr_dirty_threshold", > > @@ -2274,4 +2275,24 @@ static int __init extfrag_debug_init(void) > > } > > > > module_init(extfrag_debug_init); > > + > > +// Page metadata size (struct page and page_ext) in pages > > +unsigned long early_perpage_metadata[MAX_NUMNODES] __initdata; > > static? > Thanks for pointing this out. I shall make __initdata static in the next version of the patch.