From: Sourav Panda <souravpanda@google.com>
To: Mike Rapoport <rppt@kernel.org>
Cc: corbet@lwn.net, gregkh@linuxfoundation.org, rafael@kernel.org,
akpm@linux-foundation.org, mike.kravetz@oracle.com,
muchun.song@linux.dev, david@redhat.com, rdunlap@infradead.org,
chenlinxuan@uniontech.com, yang.yang29@zte.com.cn,
tomas.mudrunka@gmail.com, bhelgaas@google.com,
ivan@cloudflare.com, pasha.tatashin@soleen.com,
yosryahmed@google.com, hannes@cmpxchg.org, shakeelb@google.com,
kirill.shutemov@linux.intel.com, wangkefeng.wang@huawei.com,
adobriyan@gmail.com, vbabka@suse.cz, Liam.Howlett@oracle.com,
surenb@google.com, linux-kernel@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-mm@kvack.org
Subject: Re: [PATCH v1 1/1] mm: report per-page metadata information
Date: Thu, 14 Sep 2023 15:41:00 -0700 [thread overview]
Message-ID: <CANruzcSCFHRgB7oCoZRPOerR=FH3PqvoW0ea+v-p=y+sb-c=kA@mail.gmail.com> (raw)
In-Reply-To: <20230913205125.GA3303@kernel.org>
[-- Attachment #1: Type: text/plain, Size: 9140 bytes --]
Thank you Mike Rapoport for reviewing this patch series. Please find my
responses below.
>
> > #endif /* _LINUX_VMSTAT_H */
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ba6d39b71cb1..ca36751be50e 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1758,6 +1758,10 @@ static void
> __update_and_free_hugetlb_folio(struct hstate *h,
> > destroy_compound_gigantic_folio(folio, huge_page_order(h));
> > free_gigantic_folio(folio, huge_page_order(h));
> > } else {
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > + __mod_node_page_state(NODE_DATA(page_to_nid(&folio->page)),
> > + NR_PAGE_METADATA,
> -huge_page_order(h));
>
> I don't think memory map will change here with classic SPARSEMEM
>
Thank you. Yes, I agree with your comment.
>
> > +#endif
> > __free_pages(&folio->page, huge_page_order(h));
> > }
> > }
> > @@ -2143,7 +2147,9 @@ static struct folio
> *alloc_buddy_hugetlb_folio(struct hstate *h,
> > __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> > return NULL;
> > }
> > -
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> huge_page_order(h));
> > +#endif
> > __count_vm_event(HTLB_BUDDY_PGALLOC);
> > return page_folio(page);
> > }
> > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> > index 4b9734777f69..7f920bfa8e79 100644
> > --- a/mm/hugetlb_vmemmap.c
> > +++ b/mm/hugetlb_vmemmap.c
> > @@ -214,6 +214,8 @@ static inline void free_vmemmap_page(struct page
> *page)
> > free_bootmem_page(page);
> > else
> > __free_page(page);
> > + __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> > + NR_PAGE_METADATA, -1);
> > }
> >
> > /* Free a list of the vmemmap pages */
> > @@ -336,6 +338,7 @@ static int vmemmap_remap_free(unsigned long start,
> unsigned long end,
> > (void *)walk.reuse_addr);
> > list_add(&walk.reuse_page->lru, &vmemmap_pages);
> > }
> > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, 1);
> >
> > /*
> > * In order to make remapping routine most efficient for the huge
> pages,
> > @@ -387,8 +390,12 @@ static int alloc_vmemmap_page_list(unsigned long
> start, unsigned long end,
> >
> > while (nr_pages--) {
> > page = alloc_pages_node(nid, gfp_mask, 0);
> > - if (!page)
> > + if (!page) {
> > goto out;
> > + } else {
> > + __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> > + NR_PAGE_METADATA, 1);
>
> We can update this once for nr_pages outside the loop, cannot we?
>
Thank you for the comment. I agree with you and shall incorporate it.
>
> > + }
> > list_add_tail(&page->lru, list);
> > }
> >
> > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > index 50f2f34745af..e02dce7e2e9a 100644
> > --- a/mm/mm_init.c
> > +++ b/mm/mm_init.c
> > @@ -26,6 +26,7 @@
> > #include <linux/pgtable.h>
> > #include <linux/swap.h>
> > #include <linux/cma.h>
> > +#include <linux/vmstat.h>
> > #include "internal.h"
> > #include "slab.h"
> > #include "shuffle.h"
> > @@ -1656,6 +1657,8 @@ static void __init alloc_node_mem_map(struct
> pglist_data *pgdat)
> > panic("Failed to allocate %ld bytes for node %d
> memory map\n",
> > size, pgdat->node_id);
> > pgdat->node_mem_map = map + offset;
> > + mod_node_early_perpage_metadata(pgdat->node_id,
> > + PAGE_ALIGN(size) >>
> PAGE_SHIFT);
> > }
> > pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
> > __func__, pgdat->node_id, (unsigned
> long)pgdat,
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 0c5be12f9336..4e295d5087f4 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -5443,6 +5443,7 @@ void __init setup_per_cpu_pageset(void)
> > for_each_online_pgdat(pgdat)
> > pgdat->per_cpu_nodestats =
> > alloc_percpu(struct per_cpu_nodestat);
> > + writeout_early_perpage_metadata();
>
> Why it's called here?
> You can copy early stats to actual node stats as soon as the nodes and page
> allocator are initialized.
>
Thank you for mentioning this. I agree with you and shall move it there.
>
> > }
> >
> > __meminit void zone_pcp_init(struct zone *zone)
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index 4548fcc66d74..b5b9d3079e20 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -201,6 +201,8 @@ static int __init alloc_node_page_ext(int nid)
> > return -ENOMEM;
> > NODE_DATA(nid)->node_page_ext = base;
> > total_usage += table_size;
> > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > + PAGE_ALIGN(table_size) >> PAGE_SHIFT);
> > return 0;
> > }
> >
> > @@ -255,12 +257,15 @@ static void *__meminit alloc_page_ext(size_t size,
> int nid)
> > void *addr = NULL;
> >
> > addr = alloc_pages_exact_nid(nid, size, flags);
> > - if (addr) {
> > + if (addr)
> > kmemleak_alloc(addr, size, 1, flags);
> > - return addr;
> > - }
> > + else
> > + addr = vzalloc_node(size, nid);
> >
> > - addr = vzalloc_node(size, nid);
> > + if (addr) {
> > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > + PAGE_ALIGN(size) >> PAGE_SHIFT);
> > + }
> >
> > return addr;
> > }
> > @@ -314,6 +319,10 @@ static void free_page_ext(void *addr)
> > BUG_ON(PageReserved(page));
> > kmemleak_free(addr);
> > free_pages_exact(addr, table_size);
> > +
> > + __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> NR_PAGE_METADATA,
> > + (long)-1 * (PAGE_ALIGN(table_size)
> >> PAGE_SHIFT));
> > +
>
> what happens with vmalloc()ed page_ext?
>
Thank you for pointing this out. I shall also make this change for
vmalloc()ed page_ext.
>
> > }
> > }
> >
> > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > index a2cbe44c48e1..e33f302db7c6 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -469,5 +469,8 @@ struct page * __meminit
> __populate_section_memmap(unsigned long pfn,
> > if (r < 0)
> > return NULL;
> >
> > + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > + PAGE_ALIGN(end - start) >> PAGE_SHIFT);
> > +
> > return pfn_to_page(pfn);
> > }
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 77d91e565045..db78233a85ef 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -14,7 +14,7 @@
> > #include <linux/swap.h>
> > #include <linux/swapops.h>
> > #include <linux/bootmem_info.h>
> > -
> > +#include <linux/vmstat.h>
> > #include "internal.h"
> > #include <asm/dma.h>
> >
> > @@ -465,6 +465,9 @@ static void __init sparse_buffer_init(unsigned long
> size, int nid)
> > */
> > sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid,
> true);
> > sparsemap_buf_end = sparsemap_buf + size;
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > + mod_node_early_perpage_metadata(nid, PAGE_ALIGN(size) >>
> PAGE_SHIFT);
>
> All early struct pages are allocated in memmap_alloc(). It'd make sense to
> update
> the counter there.
>
Thanks for the comment. The reason why we did not do it in memmap_alloc()
is because the struct pages can decrease as well.
>
> > +#endif
> > }
> >
> > static void __init sparse_buffer_fini(void)
> > @@ -641,6 +644,8 @@ static void depopulate_section_memmap(unsigned long
> pfn, unsigned long nr_pages,
> > unsigned long start = (unsigned long) pfn_to_page(pfn);
> > unsigned long end = start + nr_pages * sizeof(struct page);
> >
> > + __mod_node_page_state(NODE_DATA(page_to_nid(pfn_to_page(pfn))),
> NR_PAGE_METADATA,
> > + (long)-1 * (PAGE_ALIGN(end - start) >>
> PAGE_SHIFT));
> > vmemmap_free(start, end, altmap);
> > }
> > static void free_map_bootmem(struct page *memmap)
> > diff --git a/mm/vmstat.c b/mm/vmstat.c
> > index 00e81e99c6ee..731eb5264b49 100644
> > --- a/mm/vmstat.c
> > +++ b/mm/vmstat.c
> > @@ -1245,6 +1245,7 @@ const char * const vmstat_text[] = {
> > "pgpromote_success",
> > "pgpromote_candidate",
> > #endif
> > + "nr_page_metadata",
> >
> > /* enum writeback_stat_item counters */
> > "nr_dirty_threshold",
> > @@ -2274,4 +2275,24 @@ static int __init extfrag_debug_init(void)
> > }
> >
> > module_init(extfrag_debug_init);
> > +
> > +// Page metadata size (struct page and page_ext) in pages
> > +unsigned long early_perpage_metadata[MAX_NUMNODES] __initdata;
>
> static?
>
Thanks for pointing this out. I shall make __initdata static in the next
version of the patch.
[-- Attachment #2: Type: text/html, Size: 12433 bytes --]
next prev parent reply other threads:[~2023-09-14 22:41 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-13 17:29 [PATCH v1 0/1] Report perpage " Sourav Panda
2023-09-13 17:30 ` [PATCH v1 1/1] mm: report per-page " Sourav Panda
2023-09-13 17:56 ` Matthew Wilcox
2023-09-14 21:04 ` Sourav Panda
2023-09-13 19:34 ` kernel test robot
2023-09-13 20:51 ` Mike Rapoport
2023-09-14 12:47 ` Matthew Wilcox
2023-09-14 22:45 ` Sourav Panda
2023-09-14 22:41 ` Sourav Panda [this message]
2023-09-13 21:53 ` kernel test robot
2023-09-14 13:00 ` David Hildenbrand
2023-09-14 22:47 ` Sourav Panda
2023-09-18 8:14 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CANruzcSCFHRgB7oCoZRPOerR=FH3PqvoW0ea+v-p=y+sb-c=kA@mail.gmail.com' \
--to=souravpanda@google.com \
--cc=Liam.Howlett@oracle.com \
--cc=adobriyan@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=bhelgaas@google.com \
--cc=chenlinxuan@uniontech.com \
--cc=corbet@lwn.net \
--cc=david@redhat.com \
--cc=gregkh@linuxfoundation.org \
--cc=hannes@cmpxchg.org \
--cc=ivan@cloudflare.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=muchun.song@linux.dev \
--cc=pasha.tatashin@soleen.com \
--cc=rafael@kernel.org \
--cc=rdunlap@infradead.org \
--cc=rppt@kernel.org \
--cc=shakeelb@google.com \
--cc=surenb@google.com \
--cc=tomas.mudrunka@gmail.com \
--cc=vbabka@suse.cz \
--cc=wangkefeng.wang@huawei.com \
--cc=yang.yang29@zte.com.cn \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox