From: Kefeng Wang <wangkefeng.wang@huawei.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>,
Mel Gorman <mgorman@techsingularity.net>,
Ryan Roberts <ryan.roberts@arm.com>,
David Hildenbrand <david@redhat.com>,
Barry Song <v-songbaohua@oppo.com>,
Vlastimil Babka <vbabka@suse.cz>, Zi Yan <ziy@nvidia.com>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
Jonathan Corbet <corbet@lwn.net>, Yang Shi <shy828301@gmail.com>,
Yu Zhao <yuzhao@google.com>, <linux-mm@kvack.org>,
Kefeng Wang <wangkefeng.wang@huawei.com>
Subject: [PATCH rfc 3/3] mm: pcp: show per-order pages count
Date: Mon, 15 Apr 2024 16:12:20 +0800 [thread overview]
Message-ID: <20240415081220.3246839-4-wangkefeng.wang@huawei.com> (raw)
In-Reply-To: <20240415081220.3246839-1-wangkefeng.wang@huawei.com>
THIS IS ONLY FOR DEBUG.
Show more detail about per-order page count on each cpu in zoneinfo, and
a new pcp_order_stat shows the total counts of each hugepage size in sysfs.
#cat /proc/zoneinfo
....
cpu: 15
count: 275
high: 529
batch: 63
order0: 59
order1: 28
order2: 28
order3: 6
order4: 0
order5: 0
order6: 0
order7: 0
order8: 0
order9: 0
#cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/pcp_order_stat
10
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/mmzone.h | 6 ++++++
include/linux/vmstat.h | 19 +++++++++++++++++++
mm/Kconfig.debug | 8 ++++++++
mm/huge_memory.c | 27 +++++++++++++++++++++++++++
mm/page_alloc.c | 4 ++++
mm/vmstat.c | 16 ++++++++++++++++
6 files changed, 80 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c745e2f1a0f2..c32c01468a77 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -665,6 +665,9 @@ enum zone_watermarks {
#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
#define HIGHORDER_PCP_LIST_INDEX (NR_LOWORDER_PCP_LISTS - (PAGE_ALLOC_COSTLY_ORDER + 1))
#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
+#ifdef CONFIG_PCP_ORDER_STATS
+#define NR_PCP_ORDER (PAGE_ALLOC_COSTLY_ORDER + NR_PCP_THP + 1)
+#endif
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
@@ -702,6 +705,9 @@ struct per_cpu_pages {
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
+#ifdef CONFIG_PCP_ORDER_STATS
+ int per_order_count[NR_PCP_ORDER]; /* per-order page counts */
+#endif
} ____cacheline_aligned_in_smp;
struct per_cpu_zonestat {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 735eae6e272c..91843f2d327f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -624,4 +624,23 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
{
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
+
+static inline void pcp_order_stat_mod(struct per_cpu_pages *pcp, int order,
+ int val)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+ pcp->per_order_count[order] += val;
+#endif
+}
+
+static inline void pcp_order_stat_inc(struct per_cpu_pages *pcp, int order)
+{
+ pcp_order_stat_mod(pcp, order, 1);
+}
+
+static inline void pcp_order_stat_dec(struct per_cpu_pages *pcp, int order)
+{
+ pcp_order_stat_mod(pcp, order, -1);
+}
+
#endif /* _LINUX_VMSTAT_H */
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index afc72fde0f03..57eef0ce809b 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -276,3 +276,11 @@ config PER_VMA_LOCK_STATS
overhead in the page fault path.
If in doubt, say N.
+
+config PCP_ORDER_STATS
+ bool "Statistics for per-order of PCP (Per-CPU pageset)"
+ help
+ Say Y to show per-order statistics of Per-CPU pageset from zoneinfo
+ and pcp_order_stat in sysfs.
+
+ If in doubt, say N.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9b8a8aa36526..0c6262bb8fe4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -599,12 +599,39 @@ DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
+#ifdef CONFIG_PCP_ORDER_STATS
+static ssize_t pcp_order_stat_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int order = to_thpsize(kobj)->order;
+ unsigned int counts = 0;
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pages *pcp;
+ int i;
+
+ for_each_online_cpu(i) {
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
+ counts += pcp->per_order_count[order];
+ }
+ }
+
+ return sysfs_emit(buf, "%u\n", counts);
+}
+
+static struct kobj_attribute pcp_order_stat_attr = __ATTR_RO(pcp_order_stat);
+#endif
+
static struct attribute *stats_attrs[] = {
&anon_alloc_attr.attr,
&anon_alloc_fallback_attr.attr,
&anon_swpout_attr.attr,
&anon_swpout_fallback_attr.attr,
&anon_swpin_refault_attr.attr,
+#ifdef CONFIG_PCP_ORDER_STATS
+ &pcp_order_stat_attr.attr,
+#endif
NULL,
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25fd3fe30cb0..f44cdf8dec50 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1185,6 +1185,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
list_del(&page->pcp_list);
count -= nr_pages;
pcp->count -= nr_pages;
+ pcp_order_stat_dec(pcp, order);
__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, order, mt);
@@ -2560,6 +2561,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
pcp->count += 1 << order;
+ pcp_order_stat_inc(pcp, order);
batch = READ_ONCE(pcp->batch);
/*
@@ -2957,6 +2959,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
migratetype, alloc_flags);
pcp->count += alloced << order;
+ pcp_order_stat_mod(pcp, order, alloced);
if (unlikely(list_empty(list)))
return NULL;
}
@@ -2964,6 +2967,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
page = list_first_entry(list, struct page, pcp_list);
list_del(&page->pcp_list);
pcp->count -= 1 << order;
+ pcp_order_stat_dec(pcp, order);
} while (check_new_pages(page, order));
return page;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..632bb1ed6a53 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1674,6 +1674,19 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
return false;
}
+static void zoneinfo_show_pcp_order_stat(struct seq_file *m,
+ struct per_cpu_pages *pcp)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+ int j;
+
+ for (j = 0; j < NR_PCP_ORDER; j++)
+ seq_printf(m,
+ "\n order%d: %i",
+ j, pcp->per_order_count[j]);
+#endif
+}
+
static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
struct zone *zone)
{
@@ -1748,6 +1761,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
pcp->count,
pcp->high,
pcp->batch);
+
+ zoneinfo_show_pcp_order_stat(m, pcp);
+
#ifdef CONFIG_SMP
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
seq_printf(m, "\n vm stats threshold: %d",
--
2.27.0
next prev parent reply other threads:[~2024-04-15 8:12 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-15 8:12 [PATCH rfc 0/3] mm: allow more high-order pages stored on PCP lists Kefeng Wang
2024-04-15 8:12 ` [PATCH rfc 1/3] mm: prepare more high-order pages to be stored on the per-cpu lists Kefeng Wang
2024-04-15 11:41 ` Baolin Wang
2024-04-15 12:25 ` Kefeng Wang
2024-04-15 8:12 ` [PATCH rfc 2/3] mm: add control to allow specified high-order pages stored on PCP list Kefeng Wang
2024-04-15 8:12 ` Kefeng Wang [this message]
2024-04-15 8:18 ` [PATCH rfc 0/3] mm: allow more high-order pages stored on PCP lists Barry Song
2024-04-15 8:59 ` Kefeng Wang
2024-04-15 10:52 ` David Hildenbrand
2024-04-15 11:14 ` Barry Song
2024-04-15 12:17 ` Kefeng Wang
2024-04-16 0:21 ` Barry Song
2024-04-16 4:50 ` Kefeng Wang
2024-04-16 4:58 ` Kefeng Wang
2024-04-16 5:26 ` Barry Song
2024-04-16 7:03 ` David Hildenbrand
2024-04-16 8:06 ` Kefeng Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240415081220.3246839-4-wangkefeng.wang@huawei.com \
--to=wangkefeng.wang@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=corbet@lwn.net \
--cc=david@redhat.com \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=ryan.roberts@arm.com \
--cc=shy828301@gmail.com \
--cc=v-songbaohua@oppo.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yuzhao@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox