linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Wupeng Ma <mawupeng1@huawei.com>
To: <akpm@linux-foundation.org>, <mgorman@techsingularity.net>,
	<ying.huang@intel.com>, <mhocko@suse.com>, <dmaluka@chromium.org>
Cc: <liushixin2@huawei.com>, <wangkefeng.wang@huawei.com>,
	<linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
	Ma Wupeng <mawupeng1@huawei.com>
Subject: [PATCH] mm, proc: collect percpu free pages into the free pages
Date: Fri, 30 Aug 2024 09:44:53 +0800	[thread overview]
Message-ID: <20240830014453.3070909-1-mawupeng1@huawei.com> (raw)

From: Ma Wupeng <mawupeng1@huawei.com>

The introduction of Per-CPU-Pageset (PCP) per zone aims to enhance the
performance of the page allocator by enabling page allocation without
requiring the zone lock. This kind of memory is free memory however is
not included in Memfree or MemAvailable.

With the support of higt-order pcp and pcp auto-tuning, the size of the
pages in this list has become a matter of concern due to the following
patches:

  1. Introduction of Order 1~3 and PMD level PCP in commit 44042b449872
  ("mm/page_alloc: allow high-order pages to be stored on the per-cpu
  lists").
  2. Introduction of PCP auto-tuning in commit 90b41691b988 ("mm: add
  framework for PCP high auto-tuning").

Which lead to the total amount of the pcp can not be ignored just after
booting without any real tasks for as the result show below:

		   w/o patch	  with patch	      diff	diff/total
MemTotal:	525424652 kB	525424652 kB	      0 kB	        0%
MemFree:	517030396 kB	520134136 kB	3103740 kB	      0.6%
MemAvailable:	515837152 kB	518941080 kB	3103928 kB	      0.6%

On a machine with 16 zones and 600+ CPUs, prior to these commits, the PCP
list contained 274368 pages (1097M) immediately after booting. In the
mainline, this number has increased to 3003M, marking a 173% increase.

Since available memory is used by numerous services to determine memory
pressure. A substantial PCP memory volume leads to an inaccurate estimation
of available memory size, significantly impacting the service logic.

Remove the useless CONFIG_HIGMEM in si_meminfo_node since it will always
false in is_highmem_idx if config is not enabled.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
---
 mm/show_mem.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/mm/show_mem.c b/mm/show_mem.c
index bdb439551eef..08f566c30b3d 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -29,6 +29,26 @@ static inline void show_node(struct zone *zone)
 		printk("Node %d ", zone_to_nid(zone));
 }
 
+static unsigned long nr_free_zone_pcplist_pages(struct zone *zone)
+{
+	unsigned long sum = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		sum += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+	return sum;
+}
+
+static unsigned long nr_free_pcplist_pages(void)
+{
+	unsigned long sum = 0;
+	struct zone *zone;
+
+	for_each_populated_zone(zone)
+		sum += nr_free_zone_pcplist_pages(zone);
+	return sum;
+}
+
 long si_mem_available(void)
 {
 	long available;
@@ -44,7 +64,8 @@ long si_mem_available(void)
 	 * Estimate the amount of memory available for userspace allocations,
 	 * without causing swapping or OOM.
 	 */
-	available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
+	available = global_zone_page_state(NR_FREE_PAGES) +
+		    nr_free_pcplist_pages() - totalreserve_pages;
 
 	/*
 	 * Not all the page cache can be freed, otherwise the system will
@@ -76,7 +97,8 @@ void si_meminfo(struct sysinfo *val)
 {
 	val->totalram = totalram_pages();
 	val->sharedram = global_node_page_state(NR_SHMEM);
-	val->freeram = global_zone_page_state(NR_FREE_PAGES);
+	val->freeram =
+		global_zone_page_state(NR_FREE_PAGES) + nr_free_pcplist_pages();
 	val->bufferram = nr_blockdev_pages();
 	val->totalhigh = totalhigh_pages();
 	val->freehigh = nr_free_highpages();
@@ -90,30 +112,27 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 {
 	int zone_type;		/* needs to be signed */
 	unsigned long managed_pages = 0;
+	unsigned long free_pages = sum_zone_node_page_state(nid, NR_FREE_PAGES);
 	unsigned long managed_highpages = 0;
 	unsigned long free_highpages = 0;
 	pg_data_t *pgdat = NODE_DATA(nid);
 
-	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
-		managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
-	val->totalram = managed_pages;
-	val->sharedram = node_page_state(pgdat, NR_SHMEM);
-	val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
-#ifdef CONFIG_HIGHMEM
 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
 		struct zone *zone = &pgdat->node_zones[zone_type];
 
+		managed_pages += zone_managed_pages(zone);
+		free_pages += nr_free_zone_pcplist_pages(zone);
 		if (is_highmem(zone)) {
 			managed_highpages += zone_managed_pages(zone);
 			free_highpages += zone_page_state(zone, NR_FREE_PAGES);
 		}
 	}
+
+	val->totalram = managed_pages;
+	val->sharedram = node_page_state(pgdat, NR_SHMEM);
+	val->freeram = free_pages;
 	val->totalhigh = managed_highpages;
 	val->freehigh = free_highpages;
-#else
-	val->totalhigh = managed_highpages;
-	val->freehigh = free_highpages;
-#endif
 	val->mem_unit = PAGE_SIZE;
 }
 #endif
@@ -196,8 +215,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
 			continue;
 
-		for_each_online_cpu(cpu)
-			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+		free_pcp += nr_free_zone_pcplist_pages(zone);
 	}
 
 	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
-- 
2.25.1



             reply	other threads:[~2024-08-30  1:45 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-30  1:44 Wupeng Ma [this message]
2024-08-30  7:53 ` Huang, Ying
2024-09-02  1:11   ` mawupeng
2024-09-02  1:29     ` Huang, Ying
2024-09-03  1:50       ` mawupeng
2024-09-03  8:09         ` Michal Hocko
2024-09-04  6:49           ` mawupeng
2024-09-04  7:28             ` Michal Hocko
2024-09-10 12:11               ` mawupeng
2024-09-10 13:11                 ` Michal Hocko
2024-09-11  5:37                 ` Huang, Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240830014453.3070909-1-mawupeng1@huawei.com \
    --to=mawupeng1@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=dmaluka@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liushixin2@huawei.com \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox