* [PATCH v2 01/13] mm: page_alloc: move mirrored_kernelcore into mm_init.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 02/13] mm: page_alloc: move init_on_alloc/free() " Kefeng Wang
` (11 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Since commit 9420f89db2dd ("mm: move most of core MM initialization
to mm/mm_init.c"), mirrored_kernelcore should be moved into mm_init.c,
as most related codes are already there.
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/mm_init.c | 2 ++
mm/page_alloc.c | 3 ---
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 7f7f9c677854..da162b7a044c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -259,6 +259,8 @@ static int __init cmdline_parse_core(char *p, unsigned long *core,
return 0;
}
+bool mirrored_kernelcore __initdata_memblock;
+
/*
* kernelcore=size sets the amount of memory for use for allocations that
* cannot be reclaimed or migrated.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1d6419cd3f37..4b4188cff820 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -23,7 +23,6 @@
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
-#include <linux/memblock.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
@@ -374,8 +373,6 @@ int user_min_free_kbytes = -1;
int watermark_boost_factor __read_mostly = 15000;
int watermark_scale_factor = 10;
-bool mirrored_kernelcore __initdata_memblock;
-
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
int movable_zone;
EXPORT_SYMBOL(movable_zone);
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 02/13] mm: page_alloc: move init_on_alloc/free() into mm_init.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 01/13] mm: page_alloc: move mirrored_kernelcore into mm_init.c Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 03/13] mm: page_alloc: move set_zone_contiguous() " Kefeng Wang
` (10 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Since commit f2fc4b44ec2b ("mm: move init_mem_debugging_and_hardening()
to mm/mm_init.c"), the init_on_alloc() and init_on_free() define is
better to move there too.
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/mm_init.c | 6 ++++++
mm/page_alloc.c | 5 -----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index da162b7a044c..15201887f8e0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2543,6 +2543,12 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
__free_pages_core(page, order);
}
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
+EXPORT_SYMBOL(init_on_alloc);
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
+EXPORT_SYMBOL(init_on_free);
+
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4b4188cff820..bc69a0474069 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -233,11 +233,6 @@ unsigned long totalcma_pages __read_mostly;
int percpu_pagelist_high_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
-DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
-EXPORT_SYMBOL(init_on_alloc);
-
-DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
-EXPORT_SYMBOL(init_on_free);
/*
* A cached value of the page's pageblock's migratetype, used when the page is
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 03/13] mm: page_alloc: move set_zone_contiguous() into mm_init.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 01/13] mm: page_alloc: move mirrored_kernelcore into mm_init.c Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 02/13] mm: page_alloc: move init_on_alloc/free() " Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 04/13] mm: page_alloc: collect mem statistic into show_mem.c Kefeng Wang
` (9 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
set_zone_contiguous() is only used in mm init/hotplug, and
clear_zone_contiguous() only used in hotplug, move them from
page_alloc.c to the more appropriate file.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/memory_hotplug.h | 3 ---
mm/internal.h | 7 +++++++
mm/mm_init.c | 22 ++++++++++++++++++++++
mm/page_alloc.c | 27 ---------------------------
4 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 9fcbf5706595..04bc286eed42 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
diff --git a/mm/internal.h b/mm/internal.h
index 644fa8b761f5..79324b7f2bc8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -371,6 +371,13 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
}
+void set_zone_contiguous(struct zone *zone);
+
+static inline void clear_zone_contiguous(struct zone *zone)
+{
+ zone->contiguous = false;
+}
+
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 15201887f8e0..0fd4ddfdfb2e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2330,6 +2330,28 @@ void __init init_cma_reserved_pageblock(struct page *page)
}
#endif
+void set_zone_contiguous(struct zone *zone)
+{
+ unsigned long block_start_pfn = zone->zone_start_pfn;
+ unsigned long block_end_pfn;
+
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
+ for (; block_start_pfn < zone_end_pfn(zone);
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
+
+ block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+ if (!__pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, zone))
+ return;
+ cond_resched();
+ }
+
+ /* We confirm that there is no hole */
+ zone->contiguous = true;
+}
+
void __init page_alloc_init_late(void)
{
struct zone *zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc69a0474069..1b84b86fd33d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1532,33 +1532,6 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
return start_page;
}
-void set_zone_contiguous(struct zone *zone)
-{
- unsigned long block_start_pfn = zone->zone_start_pfn;
- unsigned long block_end_pfn;
-
- block_end_pfn = pageblock_end_pfn(block_start_pfn);
- for (; block_start_pfn < zone_end_pfn(zone);
- block_start_pfn = block_end_pfn,
- block_end_pfn += pageblock_nr_pages) {
-
- block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
-
- if (!__pageblock_pfn_to_page(block_start_pfn,
- block_end_pfn, zone))
- return;
- cond_resched();
- }
-
- /* We confirm that there is no hole */
- zone->contiguous = true;
-}
-
-void clear_zone_contiguous(struct zone *zone)
-{
- zone->contiguous = false;
-}
-
/*
* The order of subdivision here is critical for the IO subsystem.
* Please do not alter this order without good reasons and regression
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 04/13] mm: page_alloc: collect mem statistic into show_mem.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (2 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 03/13] mm: page_alloc: move set_zone_contiguous() " Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 05/13] mm: page_alloc: squash page_is_consistent() Kefeng Wang
` (8 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Let's move show_mem.c from lib to mm, as it belongs memory subsystem,
also split some memory statistic related functions from page_alloc.c
to show_mem.c, and we cleanup some unneeded include.
There is no functional change.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
lib/Makefile | 2 +-
lib/show_mem.c | 37 -----
mm/Makefile | 2 +-
mm/page_alloc.c | 402 ---------------------------------------------
mm/show_mem.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 431 insertions(+), 441 deletions(-)
delete mode 100644 lib/show_mem.c
create mode 100644 mm/show_mem.c
diff --git a/lib/Makefile b/lib/Makefile
index 876fcdeae34e..38f23f352736 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,7 +30,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o timerqueue.o xarray.o \
maple_tree.o idr.o extable.o irq_regs.o argv_split.o \
- flex_proportions.o ratelimit.o show_mem.o \
+ flex_proportions.o ratelimit.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
nmi_backtrace.o win_minmax.o memcat_p.o \
diff --git a/lib/show_mem.c b/lib/show_mem.c
deleted file mode 100644
index 1485c87be935..000000000000
--- a/lib/show_mem.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Generic show_mem() implementation
- *
- * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
- */
-
-#include <linux/mm.h>
-#include <linux/cma.h>
-
-void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
-{
- unsigned long total = 0, reserved = 0, highmem = 0;
- struct zone *zone;
-
- printk("Mem-Info:\n");
- __show_free_areas(filter, nodemask, max_zone_idx);
-
- for_each_populated_zone(zone) {
-
- total += zone->present_pages;
- reserved += zone->present_pages - zone_managed_pages(zone);
-
- if (is_highmem(zone))
- highmem += zone->present_pages;
- }
-
- printk("%lu pages RAM\n", total);
- printk("%lu pages HighMem/MovableOnly\n", highmem);
- printk("%lu pages reserved\n", reserved);
-#ifdef CONFIG_CMA
- printk("%lu pages cma reserved\n", totalcma_pages);
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
- printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
-#endif
-}
diff --git a/mm/Makefile b/mm/Makefile
index e29afc890cde..5262ce5baa28 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -51,7 +51,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o percpu.o slab_common.o \
- compaction.o \
+ compaction.o show_mem.o\
interval_tree.o list_lru.o workingset.o \
debug.o gup.o mmap_lock.o $(mmu-y)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b84b86fd33d..84ba6cca3b3a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,10 +18,7 @@
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
#include <linux/interrupt.h>
-#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -30,8 +27,6 @@
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/oom.h>
#include <linux/topology.h>
@@ -40,19 +35,10 @@
#include <linux/cpuset.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
-#include <linux/vmalloc.h>
#include <linux/vmstat.h>
-#include <linux/mempolicy.h>
-#include <linux/memremap.h>
-#include <linux/stop_machine.h>
-#include <linux/random.h>
#include <linux/sort.h>
#include <linux/pfn.h>
-#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
-#include <linux/page-isolation.h>
-#include <linux/debugobjects.h>
-#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
#include <trace/events/oom.h>
@@ -60,12 +46,9 @@
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
-#include <linux/hugetlb.h>
-#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
#include <linux/page_owner.h>
#include <linux/page_table_check.h>
-#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
#include <linux/lockdep.h>
@@ -73,13 +56,10 @@
#include <linux/psi.h>
#include <linux/khugepaged.h>
#include <linux/delayacct.h>
-#include <asm/sections.h>
-#include <asm/tlbflush.h>
#include <asm/div64.h>
#include "internal.h"
#include "shuffle.h"
#include "page_reporting.h"
-#include "swap.h"
/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
typedef int __bitwise fpi_t;
@@ -226,11 +206,6 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
};
EXPORT_SYMBOL(node_states);
-atomic_long_t _totalram_pages __read_mostly;
-EXPORT_SYMBOL(_totalram_pages);
-unsigned long totalreserve_pages __read_mostly;
-unsigned long totalcma_pages __read_mostly;
-
int percpu_pagelist_high_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -5139,383 +5114,6 @@ unsigned long nr_free_buffer_pages(void)
}
EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
-static inline void show_node(struct zone *zone)
-{
- if (IS_ENABLED(CONFIG_NUMA))
- printk("Node %d ", zone_to_nid(zone));
-}
-
-long si_mem_available(void)
-{
- long available;
- unsigned long pagecache;
- unsigned long wmark_low = 0;
- unsigned long pages[NR_LRU_LISTS];
- unsigned long reclaimable;
- struct zone *zone;
- int lru;
-
- for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
-
- for_each_zone(zone)
- wmark_low += low_wmark_pages(zone);
-
- /*
- * Estimate the amount of memory available for userspace allocations,
- * without causing swapping or OOM.
- */
- available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
-
- /*
- * Not all the page cache can be freed, otherwise the system will
- * start swapping or thrashing. Assume at least half of the page
- * cache, or the low watermark worth of cache, needs to stay.
- */
- pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
- pagecache -= min(pagecache / 2, wmark_low);
- available += pagecache;
-
- /*
- * Part of the reclaimable slab and other kernel memory consists of
- * items that are in use, and cannot be freed. Cap this estimate at the
- * low watermark.
- */
- reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
- global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
- available += reclaimable - min(reclaimable / 2, wmark_low);
-
- if (available < 0)
- available = 0;
- return available;
-}
-EXPORT_SYMBOL_GPL(si_mem_available);
-
-void si_meminfo(struct sysinfo *val)
-{
- val->totalram = totalram_pages();
- val->sharedram = global_node_page_state(NR_SHMEM);
- val->freeram = global_zone_page_state(NR_FREE_PAGES);
- val->bufferram = nr_blockdev_pages();
- val->totalhigh = totalhigh_pages();
- val->freehigh = nr_free_highpages();
- val->mem_unit = PAGE_SIZE;
-}
-
-EXPORT_SYMBOL(si_meminfo);
-
-#ifdef CONFIG_NUMA
-void si_meminfo_node(struct sysinfo *val, int nid)
-{
- int zone_type; /* needs to be signed */
- unsigned long managed_pages = 0;
- unsigned long managed_highpages = 0;
- unsigned long free_highpages = 0;
- pg_data_t *pgdat = NODE_DATA(nid);
-
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
- managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
- val->totalram = managed_pages;
- val->sharedram = node_page_state(pgdat, NR_SHMEM);
- val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
-#ifdef CONFIG_HIGHMEM
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
- struct zone *zone = &pgdat->node_zones[zone_type];
-
- if (is_highmem(zone)) {
- managed_highpages += zone_managed_pages(zone);
- free_highpages += zone_page_state(zone, NR_FREE_PAGES);
- }
- }
- val->totalhigh = managed_highpages;
- val->freehigh = free_highpages;
-#else
- val->totalhigh = managed_highpages;
- val->freehigh = free_highpages;
-#endif
- val->mem_unit = PAGE_SIZE;
-}
-#endif
-
-/*
- * Determine whether the node should be displayed or not, depending on whether
- * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
- */
-static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
-{
- if (!(flags & SHOW_MEM_FILTER_NODES))
- return false;
-
- /*
- * no node mask - aka implicit memory numa policy. Do not bother with
- * the synchronization - read_mems_allowed_begin - because we do not
- * have to be precise here.
- */
- if (!nodemask)
- nodemask = &cpuset_current_mems_allowed;
-
- return !node_isset(nid, *nodemask);
-}
-
-static void show_migration_types(unsigned char type)
-{
- static const char types[MIGRATE_TYPES] = {
- [MIGRATE_UNMOVABLE] = 'U',
- [MIGRATE_MOVABLE] = 'M',
- [MIGRATE_RECLAIMABLE] = 'E',
- [MIGRATE_HIGHATOMIC] = 'H',
-#ifdef CONFIG_CMA
- [MIGRATE_CMA] = 'C',
-#endif
-#ifdef CONFIG_MEMORY_ISOLATION
- [MIGRATE_ISOLATE] = 'I',
-#endif
- };
- char tmp[MIGRATE_TYPES + 1];
- char *p = tmp;
- int i;
-
- for (i = 0; i < MIGRATE_TYPES; i++) {
- if (type & (1 << i))
- *p++ = types[i];
- }
-
- *p = '\0';
- printk(KERN_CONT "(%s) ", tmp);
-}
-
-static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
-{
- int zone_idx;
- for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
- if (zone_managed_pages(pgdat->node_zones + zone_idx))
- return true;
- return false;
-}
-
-/*
- * Show free area list (used inside shift_scroll-lock stuff)
- * We also calculate the percentage fragmentation. We do this by counting the
- * memory on each free list with the exception of the first item on the list.
- *
- * Bits in @filter:
- * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
- * cpuset.
- */
-void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
-{
- unsigned long free_pcp = 0;
- int cpu, nid;
- struct zone *zone;
- pg_data_t *pgdat;
-
- for_each_populated_zone(zone) {
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
-
- for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
- }
-
- printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
- " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
- " unevictable:%lu dirty:%lu writeback:%lu\n"
- " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
- " mapped:%lu shmem:%lu pagetables:%lu\n"
- " sec_pagetables:%lu bounce:%lu\n"
- " kernel_misc_reclaimable:%lu\n"
- " free:%lu free_pcp:%lu free_cma:%lu\n",
- global_node_page_state(NR_ACTIVE_ANON),
- global_node_page_state(NR_INACTIVE_ANON),
- global_node_page_state(NR_ISOLATED_ANON),
- global_node_page_state(NR_ACTIVE_FILE),
- global_node_page_state(NR_INACTIVE_FILE),
- global_node_page_state(NR_ISOLATED_FILE),
- global_node_page_state(NR_UNEVICTABLE),
- global_node_page_state(NR_FILE_DIRTY),
- global_node_page_state(NR_WRITEBACK),
- global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
- global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
- global_node_page_state(NR_FILE_MAPPED),
- global_node_page_state(NR_SHMEM),
- global_node_page_state(NR_PAGETABLE),
- global_node_page_state(NR_SECONDARY_PAGETABLE),
- global_zone_page_state(NR_BOUNCE),
- global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
- global_zone_page_state(NR_FREE_PAGES),
- free_pcp,
- global_zone_page_state(NR_FREE_CMA_PAGES));
-
- for_each_online_pgdat(pgdat) {
- if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
- continue;
- if (!node_has_managed_zones(pgdat, max_zone_idx))
- continue;
-
- printk("Node %d"
- " active_anon:%lukB"
- " inactive_anon:%lukB"
- " active_file:%lukB"
- " inactive_file:%lukB"
- " unevictable:%lukB"
- " isolated(anon):%lukB"
- " isolated(file):%lukB"
- " mapped:%lukB"
- " dirty:%lukB"
- " writeback:%lukB"
- " shmem:%lukB"
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- " shmem_thp: %lukB"
- " shmem_pmdmapped: %lukB"
- " anon_thp: %lukB"
-#endif
- " writeback_tmp:%lukB"
- " kernel_stack:%lukB"
-#ifdef CONFIG_SHADOW_CALL_STACK
- " shadow_call_stack:%lukB"
-#endif
- " pagetables:%lukB"
- " sec_pagetables:%lukB"
- " all_unreclaimable? %s"
- "\n",
- pgdat->node_id,
- K(node_page_state(pgdat, NR_ACTIVE_ANON)),
- K(node_page_state(pgdat, NR_INACTIVE_ANON)),
- K(node_page_state(pgdat, NR_ACTIVE_FILE)),
- K(node_page_state(pgdat, NR_INACTIVE_FILE)),
- K(node_page_state(pgdat, NR_UNEVICTABLE)),
- K(node_page_state(pgdat, NR_ISOLATED_ANON)),
- K(node_page_state(pgdat, NR_ISOLATED_FILE)),
- K(node_page_state(pgdat, NR_FILE_MAPPED)),
- K(node_page_state(pgdat, NR_FILE_DIRTY)),
- K(node_page_state(pgdat, NR_WRITEBACK)),
- K(node_page_state(pgdat, NR_SHMEM)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- K(node_page_state(pgdat, NR_SHMEM_THPS)),
- K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
- K(node_page_state(pgdat, NR_ANON_THPS)),
-#endif
- K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
- node_page_state(pgdat, NR_KERNEL_STACK_KB),
-#ifdef CONFIG_SHADOW_CALL_STACK
- node_page_state(pgdat, NR_KERNEL_SCS_KB),
-#endif
- K(node_page_state(pgdat, NR_PAGETABLE)),
- K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
- pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
- "yes" : "no");
- }
-
- for_each_populated_zone(zone) {
- int i;
-
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
-
- free_pcp = 0;
- for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
-
- show_node(zone);
- printk(KERN_CONT
- "%s"
- " free:%lukB"
- " boost:%lukB"
- " min:%lukB"
- " low:%lukB"
- " high:%lukB"
- " reserved_highatomic:%luKB"
- " active_anon:%lukB"
- " inactive_anon:%lukB"
- " active_file:%lukB"
- " inactive_file:%lukB"
- " unevictable:%lukB"
- " writepending:%lukB"
- " present:%lukB"
- " managed:%lukB"
- " mlocked:%lukB"
- " bounce:%lukB"
- " free_pcp:%lukB"
- " local_pcp:%ukB"
- " free_cma:%lukB"
- "\n",
- zone->name,
- K(zone_page_state(zone, NR_FREE_PAGES)),
- K(zone->watermark_boost),
- K(min_wmark_pages(zone)),
- K(low_wmark_pages(zone)),
- K(high_wmark_pages(zone)),
- K(zone->nr_reserved_highatomic),
- K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
- K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
- K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
- K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
- K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
- K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
- K(zone->present_pages),
- K(zone_managed_pages(zone)),
- K(zone_page_state(zone, NR_MLOCK)),
- K(zone_page_state(zone, NR_BOUNCE)),
- K(free_pcp),
- K(this_cpu_read(zone->per_cpu_pageset->count)),
- K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
- printk("lowmem_reserve[]:");
- for (i = 0; i < MAX_NR_ZONES; i++)
- printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
- printk(KERN_CONT "\n");
- }
-
- for_each_populated_zone(zone) {
- unsigned int order;
- unsigned long nr[MAX_ORDER + 1], flags, total = 0;
- unsigned char types[MAX_ORDER + 1];
-
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
- show_node(zone);
- printk(KERN_CONT "%s: ", zone->name);
-
- spin_lock_irqsave(&zone->lock, flags);
- for (order = 0; order <= MAX_ORDER; order++) {
- struct free_area *area = &zone->free_area[order];
- int type;
-
- nr[order] = area->nr_free;
- total += nr[order] << order;
-
- types[order] = 0;
- for (type = 0; type < MIGRATE_TYPES; type++) {
- if (!free_area_empty(area, type))
- types[order] |= 1 << type;
- }
- }
- spin_unlock_irqrestore(&zone->lock, flags);
- for (order = 0; order <= MAX_ORDER; order++) {
- printk(KERN_CONT "%lu*%lukB ",
- nr[order], K(1UL) << order);
- if (nr[order])
- show_migration_types(types[order]);
- }
- printk(KERN_CONT "= %lukB\n", K(total));
- }
-
- for_each_online_node(nid) {
- if (show_mem_node_skip(filter, nid, nodemask))
- continue;
- hugetlb_show_meminfo_node(nid);
- }
-
- printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
-
- show_swap_cache_info();
-}
-
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
diff --git a/mm/show_mem.c b/mm/show_mem.c
new file mode 100644
index 000000000000..01f8e9905817
--- /dev/null
+++ b/mm/show_mem.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic show_mem() implementation
+ *
+ * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/cma.h>
+#include <linux/cpuset.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+#include <linux/vmstat.h>
+
+#include "internal.h"
+#include "swap.h"
+
+atomic_long_t _totalram_pages __read_mostly;
+EXPORT_SYMBOL(_totalram_pages);
+unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
+
+static inline void show_node(struct zone *zone)
+{
+ if (IS_ENABLED(CONFIG_NUMA))
+ printk("Node %d ", zone_to_nid(zone));
+}
+
+long si_mem_available(void)
+{
+ long available;
+ unsigned long pagecache;
+ unsigned long wmark_low = 0;
+ unsigned long pages[NR_LRU_LISTS];
+ unsigned long reclaimable;
+ struct zone *zone;
+ int lru;
+
+ for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
+
+ for_each_zone(zone)
+ wmark_low += low_wmark_pages(zone);
+
+ /*
+ * Estimate the amount of memory available for userspace allocations,
+ * without causing swapping or OOM.
+ */
+ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
+
+ /*
+ * Not all the page cache can be freed, otherwise the system will
+ * start swapping or thrashing. Assume at least half of the page
+ * cache, or the low watermark worth of cache, needs to stay.
+ */
+ pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+ pagecache -= min(pagecache / 2, wmark_low);
+ available += pagecache;
+
+ /*
+ * Part of the reclaimable slab and other kernel memory consists of
+ * items that are in use, and cannot be freed. Cap this estimate at the
+ * low watermark.
+ */
+ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+ available += reclaimable - min(reclaimable / 2, wmark_low);
+
+ if (available < 0)
+ available = 0;
+ return available;
+}
+EXPORT_SYMBOL_GPL(si_mem_available);
+
+void si_meminfo(struct sysinfo *val)
+{
+ val->totalram = totalram_pages();
+ val->sharedram = global_node_page_state(NR_SHMEM);
+ val->freeram = global_zone_page_state(NR_FREE_PAGES);
+ val->bufferram = nr_blockdev_pages();
+ val->totalhigh = totalhigh_pages();
+ val->freehigh = nr_free_highpages();
+ val->mem_unit = PAGE_SIZE;
+}
+
+EXPORT_SYMBOL(si_meminfo);
+
+#ifdef CONFIG_NUMA
+void si_meminfo_node(struct sysinfo *val, int nid)
+{
+ int zone_type; /* needs to be signed */
+ unsigned long managed_pages = 0;
+ unsigned long managed_highpages = 0;
+ unsigned long free_highpages = 0;
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
+ val->totalram = managed_pages;
+ val->sharedram = node_page_state(pgdat, NR_SHMEM);
+ val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
+#ifdef CONFIG_HIGHMEM
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
+ struct zone *zone = &pgdat->node_zones[zone_type];
+
+ if (is_highmem(zone)) {
+ managed_highpages += zone_managed_pages(zone);
+ free_highpages += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ }
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
+#else
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
+#endif
+ val->mem_unit = PAGE_SIZE;
+}
+#endif
+
+/*
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
+ */
+static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
+{
+ if (!(flags & SHOW_MEM_FILTER_NODES))
+ return false;
+
+ /*
+ * no node mask - aka implicit memory numa policy. Do not bother with
+ * the synchronization - read_mems_allowed_begin - because we do not
+ * have to be precise here.
+ */
+ if (!nodemask)
+ nodemask = &cpuset_current_mems_allowed;
+
+ return !node_isset(nid, *nodemask);
+}
+
+static void show_migration_types(unsigned char type)
+{
+ static const char types[MIGRATE_TYPES] = {
+ [MIGRATE_UNMOVABLE] = 'U',
+ [MIGRATE_MOVABLE] = 'M',
+ [MIGRATE_RECLAIMABLE] = 'E',
+ [MIGRATE_HIGHATOMIC] = 'H',
+#ifdef CONFIG_CMA
+ [MIGRATE_CMA] = 'C',
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+ [MIGRATE_ISOLATE] = 'I',
+#endif
+ };
+ char tmp[MIGRATE_TYPES + 1];
+ char *p = tmp;
+ int i;
+
+ for (i = 0; i < MIGRATE_TYPES; i++) {
+ if (type & (1 << i))
+ *p++ = types[i];
+ }
+
+ *p = '\0';
+ printk(KERN_CONT "(%s) ", tmp);
+}
+
+static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
+{
+ int zone_idx;
+ for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
+ if (zone_managed_pages(pgdat->node_zones + zone_idx))
+ return true;
+ return false;
+}
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ *
+ * Bits in @filter:
+ * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
+ * cpuset.
+ */
+void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+ unsigned long free_pcp = 0;
+ int cpu, nid;
+ struct zone *zone;
+ pg_data_t *pgdat;
+
+ for_each_populated_zone(zone) {
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+
+ for_each_online_cpu(cpu)
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+ }
+
+ printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+ " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
+ " unevictable:%lu dirty:%lu writeback:%lu\n"
+ " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
+ " mapped:%lu shmem:%lu pagetables:%lu\n"
+ " sec_pagetables:%lu bounce:%lu\n"
+ " kernel_misc_reclaimable:%lu\n"
+ " free:%lu free_pcp:%lu free_cma:%lu\n",
+ global_node_page_state(NR_ACTIVE_ANON),
+ global_node_page_state(NR_INACTIVE_ANON),
+ global_node_page_state(NR_ISOLATED_ANON),
+ global_node_page_state(NR_ACTIVE_FILE),
+ global_node_page_state(NR_INACTIVE_FILE),
+ global_node_page_state(NR_ISOLATED_FILE),
+ global_node_page_state(NR_UNEVICTABLE),
+ global_node_page_state(NR_FILE_DIRTY),
+ global_node_page_state(NR_WRITEBACK),
+ global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
+ global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
+ global_node_page_state(NR_FILE_MAPPED),
+ global_node_page_state(NR_SHMEM),
+ global_node_page_state(NR_PAGETABLE),
+ global_node_page_state(NR_SECONDARY_PAGETABLE),
+ global_zone_page_state(NR_BOUNCE),
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
+ global_zone_page_state(NR_FREE_PAGES),
+ free_pcp,
+ global_zone_page_state(NR_FREE_CMA_PAGES));
+
+ for_each_online_pgdat(pgdat) {
+ if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
+ continue;
+ if (!node_has_managed_zones(pgdat, max_zone_idx))
+ continue;
+
+ printk("Node %d"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+ " unevictable:%lukB"
+ " isolated(anon):%lukB"
+ " isolated(file):%lukB"
+ " mapped:%lukB"
+ " dirty:%lukB"
+ " writeback:%lukB"
+ " shmem:%lukB"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ " shmem_thp: %lukB"
+ " shmem_pmdmapped: %lukB"
+ " anon_thp: %lukB"
+#endif
+ " writeback_tmp:%lukB"
+ " kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
+ " pagetables:%lukB"
+ " sec_pagetables:%lukB"
+ " all_unreclaimable? %s"
+ "\n",
+ pgdat->node_id,
+ K(node_page_state(pgdat, NR_ACTIVE_ANON)),
+ K(node_page_state(pgdat, NR_INACTIVE_ANON)),
+ K(node_page_state(pgdat, NR_ACTIVE_FILE)),
+ K(node_page_state(pgdat, NR_INACTIVE_FILE)),
+ K(node_page_state(pgdat, NR_UNEVICTABLE)),
+ K(node_page_state(pgdat, NR_ISOLATED_ANON)),
+ K(node_page_state(pgdat, NR_ISOLATED_FILE)),
+ K(node_page_state(pgdat, NR_FILE_MAPPED)),
+ K(node_page_state(pgdat, NR_FILE_DIRTY)),
+ K(node_page_state(pgdat, NR_WRITEBACK)),
+ K(node_page_state(pgdat, NR_SHMEM)),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ K(node_page_state(pgdat, NR_SHMEM_THPS)),
+ K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
+ K(node_page_state(pgdat, NR_ANON_THPS)),
+#endif
+ K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+ node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
+ K(node_page_state(pgdat, NR_PAGETABLE)),
+ K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+ "yes" : "no");
+ }
+
+ for_each_populated_zone(zone) {
+ int i;
+
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+
+ free_pcp = 0;
+ for_each_online_cpu(cpu)
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+
+ show_node(zone);
+ printk(KERN_CONT
+ "%s"
+ " free:%lukB"
+ " boost:%lukB"
+ " min:%lukB"
+ " low:%lukB"
+ " high:%lukB"
+ " reserved_highatomic:%luKB"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+ " unevictable:%lukB"
+ " writepending:%lukB"
+ " present:%lukB"
+ " managed:%lukB"
+ " mlocked:%lukB"
+ " bounce:%lukB"
+ " free_pcp:%lukB"
+ " local_pcp:%ukB"
+ " free_cma:%lukB"
+ "\n",
+ zone->name,
+ K(zone_page_state(zone, NR_FREE_PAGES)),
+ K(zone->watermark_boost),
+ K(min_wmark_pages(zone)),
+ K(low_wmark_pages(zone)),
+ K(high_wmark_pages(zone)),
+ K(zone->nr_reserved_highatomic),
+ K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
+ K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
+ K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
+ K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
+ K(zone->present_pages),
+ K(zone_managed_pages(zone)),
+ K(zone_page_state(zone, NR_MLOCK)),
+ K(zone_page_state(zone, NR_BOUNCE)),
+ K(free_pcp),
+ K(this_cpu_read(zone->per_cpu_pageset->count)),
+ K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
+ printk("lowmem_reserve[]:");
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+ printk(KERN_CONT "\n");
+ }
+
+ for_each_populated_zone(zone) {
+ unsigned int order;
+ unsigned long nr[MAX_ORDER + 1], flags, total = 0;
+ unsigned char types[MAX_ORDER + 1];
+
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+ show_node(zone);
+ printk(KERN_CONT "%s: ", zone->name);
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order <= MAX_ORDER; order++) {
+ struct free_area *area = &zone->free_area[order];
+ int type;
+
+ nr[order] = area->nr_free;
+ total += nr[order] << order;
+
+ types[order] = 0;
+ for (type = 0; type < MIGRATE_TYPES; type++) {
+ if (!free_area_empty(area, type))
+ types[order] |= 1 << type;
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ for (order = 0; order <= MAX_ORDER; order++) {
+ printk(KERN_CONT "%lu*%lukB ",
+ nr[order], K(1UL) << order);
+ if (nr[order])
+ show_migration_types(types[order]);
+ }
+ printk(KERN_CONT "= %lukB\n", K(total));
+ }
+
+ for_each_online_node(nid) {
+ if (show_mem_node_skip(filter, nid, nodemask))
+ continue;
+ hugetlb_show_meminfo_node(nid);
+ }
+
+ printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
+
+ show_swap_cache_info();
+}
+
+void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+ unsigned long total = 0, reserved = 0, highmem = 0;
+ struct zone *zone;
+
+ printk("Mem-Info:\n");
+ __show_free_areas(filter, nodemask, max_zone_idx);
+
+ for_each_populated_zone(zone) {
+
+ total += zone->present_pages;
+ reserved += zone->present_pages - zone_managed_pages(zone);
+
+ if (is_highmem(zone))
+ highmem += zone->present_pages;
+ }
+
+ printk("%lu pages RAM\n", total);
+ printk("%lu pages HighMem/MovableOnly\n", highmem);
+ printk("%lu pages reserved\n", reserved);
+#ifdef CONFIG_CMA
+ printk("%lu pages cma reserved\n", totalcma_pages);
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
+#endif
+}
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 05/13] mm: page_alloc: squash page_is_consistent()
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (3 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 04/13] mm: page_alloc: collect mem statistic into show_mem.c Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 06/13] mm: page_alloc: remove alloc_contig_dump_pages() stub Kefeng Wang
` (7 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Squash the page_is_consistent() into bad_range() as there is
only one caller.
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/page_alloc.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84ba6cca3b3a..1bd8b7832d40 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,13 +517,6 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
return ret;
}
-static int page_is_consistent(struct zone *zone, struct page *page)
-{
- if (zone != page_zone(page))
- return 0;
-
- return 1;
-}
/*
* Temporary debugging check for pages not lying within a given zone.
*/
@@ -531,7 +524,7 @@ static int __maybe_unused bad_range(struct zone *zone, struct page *page)
{
if (page_outside_zone_boundaries(zone, page))
return 1;
- if (!page_is_consistent(zone, page))
+ if (zone != page_zone(page))
return 1;
return 0;
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 06/13] mm: page_alloc: remove alloc_contig_dump_pages() stub
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (4 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 05/13] mm: page_alloc: squash page_is_consistent() Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 07/13] mm: page_alloc: split out FAIL_PAGE_ALLOC Kefeng Wang
` (6 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
DEFINE_DYNAMIC_DEBUG_METADATA and DYNAMIC_DEBUG_BRANCH already has
stub definitions without dynamic debug feature, remove unnecessary
alloc_contig_dump_pages() stub.
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/page_alloc.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1bd8b7832d40..aa3cdfd88393 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6250,8 +6250,6 @@ int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
}
#ifdef CONFIG_CONTIG_ALLOC
-#if defined(CONFIG_DYNAMIC_DEBUG) || \
- (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* Usage: See admin-guide/dynamic-debug-howto.rst */
static void alloc_contig_dump_pages(struct list_head *page_list)
{
@@ -6265,11 +6263,6 @@ static void alloc_contig_dump_pages(struct list_head *page_list)
dump_page(page, "migration failure");
}
}
-#else
-static inline void alloc_contig_dump_pages(struct list_head *page_list)
-{
-}
-#endif
/* [start, end) must belong to a single zone. */
int __alloc_contig_migrate_range(struct compact_control *cc,
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 07/13] mm: page_alloc: split out FAIL_PAGE_ALLOC
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (5 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 06/13] mm: page_alloc: remove alloc_contig_dump_pages() stub Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC Kefeng Wang
` (5 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
... to a single file to reduce a bit of page_alloc.c.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/fault-inject.h | 9 +++++
mm/Makefile | 1 +
mm/fail_page_alloc.c | 66 ++++++++++++++++++++++++++++++++
mm/page_alloc.c | 74 ------------------------------------
4 files changed, 76 insertions(+), 74 deletions(-)
create mode 100644 mm/fail_page_alloc.c
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 481abf530b3c..6d5edef09d45 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -93,6 +93,15 @@ struct kmem_cache;
bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+#else
+static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+ return false;
+}
+#endif /* CONFIG_FAIL_PAGE_ALLOC */
+
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
diff --git a/mm/Makefile b/mm/Makefile
index 5262ce5baa28..0eec4bc72d3f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -89,6 +89,7 @@ obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_KMSAN) += kmsan/
obj-$(CONFIG_FAILSLAB) += failslab.o
+obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
diff --git a/mm/fail_page_alloc.c b/mm/fail_page_alloc.c
new file mode 100644
index 000000000000..b1b09cce9394
--- /dev/null
+++ b/mm/fail_page_alloc.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fault-inject.h>
+#include <linux/mm.h>
+
+static struct {
+ struct fault_attr attr;
+
+ bool ignore_gfp_highmem;
+ bool ignore_gfp_reclaim;
+ u32 min_order;
+} fail_page_alloc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+ .ignore_gfp_reclaim = true,
+ .ignore_gfp_highmem = true,
+ .min_order = 1,
+};
+
+static int __init setup_fail_page_alloc(char *str)
+{
+ return setup_fault_attr(&fail_page_alloc.attr, str);
+}
+__setup("fail_page_alloc=", setup_fail_page_alloc);
+
+bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+ int flags = 0;
+
+ if (order < fail_page_alloc.min_order)
+ return false;
+ if (gfp_mask & __GFP_NOFAIL)
+ return false;
+ if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
+ return false;
+ if (fail_page_alloc.ignore_gfp_reclaim &&
+ (gfp_mask & __GFP_DIRECT_RECLAIM))
+ return false;
+
+ /* See comment in __should_failslab() */
+ if (gfp_mask & __GFP_NOWARN)
+ flags |= FAULT_NOWARN;
+
+ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_page_alloc_debugfs(void)
+{
+ umode_t mode = S_IFREG | 0600;
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+ &fail_page_alloc.attr);
+
+ debugfs_create_bool("ignore-gfp-wait", mode, dir,
+ &fail_page_alloc.ignore_gfp_reclaim);
+ debugfs_create_bool("ignore-gfp-highmem", mode, dir,
+ &fail_page_alloc.ignore_gfp_highmem);
+ debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);
+
+ return 0;
+}
+
+late_initcall(fail_page_alloc_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aa3cdfd88393..8d4e803cec44 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3031,80 +3031,6 @@ struct page *rmqueue(struct zone *preferred_zone,
return page;
}
-#ifdef CONFIG_FAIL_PAGE_ALLOC
-
-static struct {
- struct fault_attr attr;
-
- bool ignore_gfp_highmem;
- bool ignore_gfp_reclaim;
- u32 min_order;
-} fail_page_alloc = {
- .attr = FAULT_ATTR_INITIALIZER,
- .ignore_gfp_reclaim = true,
- .ignore_gfp_highmem = true,
- .min_order = 1,
-};
-
-static int __init setup_fail_page_alloc(char *str)
-{
- return setup_fault_attr(&fail_page_alloc.attr, str);
-}
-__setup("fail_page_alloc=", setup_fail_page_alloc);
-
-static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
-{
- int flags = 0;
-
- if (order < fail_page_alloc.min_order)
- return false;
- if (gfp_mask & __GFP_NOFAIL)
- return false;
- if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
- return false;
- if (fail_page_alloc.ignore_gfp_reclaim &&
- (gfp_mask & __GFP_DIRECT_RECLAIM))
- return false;
-
- /* See comment in __should_failslab() */
- if (gfp_mask & __GFP_NOWARN)
- flags |= FAULT_NOWARN;
-
- return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
-}
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-static int __init fail_page_alloc_debugfs(void)
-{
- umode_t mode = S_IFREG | 0600;
- struct dentry *dir;
-
- dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
- &fail_page_alloc.attr);
-
- debugfs_create_bool("ignore-gfp-wait", mode, dir,
- &fail_page_alloc.ignore_gfp_reclaim);
- debugfs_create_bool("ignore-gfp-highmem", mode, dir,
- &fail_page_alloc.ignore_gfp_highmem);
- debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);
-
- return 0;
-}
-
-late_initcall(fail_page_alloc_debugfs);
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
-#else /* CONFIG_FAIL_PAGE_ALLOC */
-
-static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
-{
- return false;
-}
-
-#endif /* CONFIG_FAIL_PAGE_ALLOC */
-
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
return __should_fail_alloc_page(gfp_mask, order);
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (6 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 07/13] mm: page_alloc: split out FAIL_PAGE_ALLOC Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 22:22 ` Andrew Morton
2023-05-16 6:38 ` [PATCH v2 09/13] mm: page_alloc: move mark_free_page() into snapshot.c Kefeng Wang
` (4 subsequent siblings)
12 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Move DEBUG_PAGEALLOC related functions into a single file to
reduce a bit of page_alloc.c.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/mm.h | 76 ++++++++++++++++++++++++++++---------------
mm/Makefile | 1 +
mm/debug_page_alloc.c | 59 +++++++++++++++++++++++++++++++++
mm/page_alloc.c | 69 ---------------------------------------
4 files changed, 109 insertions(+), 96 deletions(-)
create mode 100644 mm/debug_page_alloc.c
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db3f66ed2f32..d3241f4ac903 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3485,9 +3485,58 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, 0);
}
+
+extern unsigned int _debug_guardpage_minorder;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+ return _debug_guardpage_minorder;
+}
+
+static inline bool debug_guardpage_enabled(void)
+{
+ return static_branch_unlikely(&_debug_guardpage_enabled);
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+
+ return PageGuard(page);
+}
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+ return __set_page_guard(zone, page, order, migratetype);
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return;
+ __clear_page_guard(zone, page, order, migratetype);
+}
+
#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
+static inline bool page_is_guard(struct page *page) { return false; }
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) { return false; }
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) {}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
@@ -3725,33 +3774,6 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern unsigned int _debug_guardpage_minorder;
-DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static inline unsigned int debug_guardpage_minorder(void)
-{
- return _debug_guardpage_minorder;
-}
-
-static inline bool debug_guardpage_enabled(void)
-{
- return static_branch_unlikely(&_debug_guardpage_enabled);
-}
-
-static inline bool page_is_guard(struct page *page)
-{
- if (!debug_guardpage_enabled())
- return false;
-
- return PageGuard(page);
-}
-#else
-static inline unsigned int debug_guardpage_minorder(void) { return 0; }
-static inline bool debug_guardpage_enabled(void) { return false; }
-static inline bool page_is_guard(struct page *page) { return false; }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
#if MAX_NUMNODES > 1
void __init setup_nr_node_ids(void);
#else
diff --git a/mm/Makefile b/mm/Makefile
index 0eec4bc72d3f..678530a07326 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -124,6 +124,7 @@ obj-$(CONFIG_SECRETMEM) += secretmem.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
+obj-$(CONFIG_DEBUG_PAGEALLOC) += debug_page_alloc.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_DAMON) += damon/
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
new file mode 100644
index 000000000000..f9d145730fd1
--- /dev/null
+++ b/mm/debug_page_alloc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/page-isolation.h>
+
+unsigned int _debug_guardpage_minorder;
+
+bool _debug_pagealloc_enabled_early __read_mostly
+ = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
+DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+EXPORT_SYMBOL(_debug_pagealloc_enabled);
+
+DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static int __init early_debug_pagealloc(char *buf)
+{
+ return kstrtobool(buf, &_debug_pagealloc_enabled_early);
+}
+early_param("debug_pagealloc", early_debug_pagealloc);
+
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+ unsigned long res;
+
+ if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) {
+ pr_err("Bad debug_guardpage_minorder value\n");
+ return 0;
+ }
+ _debug_guardpage_minorder = res;
+ pr_info("Setting debug_guardpage_minorder to %lu\n", res);
+ return 0;
+}
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype)
+{
+ if (order >= debug_guardpage_minorder())
+ return false;
+
+ __SetPageGuard(page);
+ INIT_LIST_HEAD(&page->buddy_list);
+ set_page_private(page, order);
+ /* Guard pages are not available for any usage */
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+ return true;
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype)
+{
+ __ClearPageGuard(page);
+
+ set_page_private(page, 0);
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, (1 << order), migratetype);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8d4e803cec44..dc9820466377 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -664,75 +664,6 @@ void destroy_large_folio(struct folio *folio)
compound_page_dtors[dtor](&folio->page);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-unsigned int _debug_guardpage_minorder;
-
-bool _debug_pagealloc_enabled_early __read_mostly
- = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
-EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
-DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
-EXPORT_SYMBOL(_debug_pagealloc_enabled);
-
-DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static int __init early_debug_pagealloc(char *buf)
-{
- return kstrtobool(buf, &_debug_pagealloc_enabled_early);
-}
-early_param("debug_pagealloc", early_debug_pagealloc);
-
-static int __init debug_guardpage_minorder_setup(char *buf)
-{
- unsigned long res;
-
- if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) {
- pr_err("Bad debug_guardpage_minorder value\n");
- return 0;
- }
- _debug_guardpage_minorder = res;
- pr_info("Setting debug_guardpage_minorder to %lu\n", res);
- return 0;
-}
-early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
-
-static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
-{
- if (!debug_guardpage_enabled())
- return false;
-
- if (order >= debug_guardpage_minorder())
- return false;
-
- __SetPageGuard(page);
- INIT_LIST_HEAD(&page->buddy_list);
- set_page_private(page, order);
- /* Guard pages are not available for any usage */
- if (!is_migrate_isolate(migratetype))
- __mod_zone_freepage_state(zone, -(1 << order), migratetype);
-
- return true;
-}
-
-static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
-{
- if (!debug_guardpage_enabled())
- return;
-
- __ClearPageGuard(page);
-
- set_page_private(page, 0);
- if (!is_migrate_isolate(migratetype))
- __mod_zone_freepage_state(zone, (1 << order), migratetype);
-}
-#else
-static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) { return false; }
-static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) {}
-#endif
-
static inline void set_buddy_order(struct page *page, unsigned int order)
{
set_page_private(page, order);
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC
2023-05-16 6:38 ` [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC Kefeng Wang
@ 2023-05-16 22:22 ` Andrew Morton
2023-05-18 1:35 ` Kefeng Wang
0 siblings, 1 reply; 17+ messages in thread
From: Andrew Morton @ 2023-05-16 22:22 UTC (permalink / raw)
To: Kefeng Wang
Cc: Mike Rapoport, linux-mm, David Hildenbrand, Oscar Salvador,
Rafael J. Wysocki, Pavel Machek, Len Brown, Luis Chamberlain,
Kees Cook, Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel,
ying.huang
On Tue, 16 May 2023 14:38:16 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> DEBUG_PAGEALLOC
>
> mm/debug_page_alloc.c | 59 +++++++++++++++++++++++++++++++++
> mm/page_alloc.c | 69 ---------------------------------------
and
FAIL_PAGE_ALLOC
We're irritatingly inconsistent about whether there's an underscore.
akpm:/usr/src/25> grep page_alloc mm/*c|wc -l
49
akpm:/usr/src/25> grep pagealloc mm/*c|wc -l
28
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC
2023-05-16 22:22 ` Andrew Morton
@ 2023-05-18 1:35 ` Kefeng Wang
2023-05-18 2:10 ` Andrew Morton
0 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2023-05-18 1:35 UTC (permalink / raw)
To: Andrew Morton
Cc: Mike Rapoport, linux-mm, David Hildenbrand, Oscar Salvador,
Rafael J. Wysocki, Pavel Machek, Len Brown, Luis Chamberlain,
Kees Cook, Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel,
ying.huang
On 2023/5/17 6:22, Andrew Morton wrote:
> On Tue, 16 May 2023 14:38:16 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>> DEBUG_PAGEALLOC
>>
>> mm/debug_page_alloc.c | 59 +++++++++++++++++++++++++++++++++
>> mm/page_alloc.c | 69 ---------------------------------------
>
> and
>
> FAIL_PAGE_ALLOC
>
> We're irritatingly inconsistent about whether there's an underscore.
>
> akpm:/usr/src/25> grep page_alloc mm/*c|wc -l
> 49
> akpm:/usr/src/25> grep pagealloc mm/*c|wc -l
> 28
All the 28 pagealloc naming is from DEBUG_PAGEALLOC feature, they chould
be changed to page_alloc except the cmdline, but it will lead to long
function name and don't gain too much advantage, so keep unchange?
$ grep pagealloc mm/*c
mm/debug_page_alloc.c:bool _debug_pagealloc_enabled_early __read_mostly
mm/debug_page_alloc.c:EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
mm/debug_page_alloc.c:DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
mm/debug_page_alloc.c:EXPORT_SYMBOL(_debug_pagealloc_enabled);
mm/debug_page_alloc.c:static int __init early_debug_pagealloc(char *buf)
mm/debug_page_alloc.c: return kstrtobool(buf,
&_debug_pagealloc_enabled_early);
mm/debug_page_alloc.c:early_param("debug_pagealloc", early_debug_pagealloc);
mm/memory_hotplug.c: * Freeing the page with debug_pagealloc enabled
will try to unmap it,
mm/memory_hotplug.c: debug_pagealloc_map_pages(page, 1 << order);
mm/mm_init.c: debug_pagealloc_enabled())) {
mm/mm_init.c: if (debug_pagealloc_enabled()) {
mm/mm_init.c: static_branch_enable(&_debug_pagealloc_enabled);
mm/page_alloc.c: * page becomes unavailable via debug_pagealloc or
arch_free_page.
mm/page_alloc.c: debug_pagealloc_unmap_pages(page, 1 << order);
mm/page_alloc.c: debug_pagealloc_map_pages(page, 1 << order);
mm/page_poison.c: pr_err("pagealloc: single bit error\n");
mm/page_poison.c: pr_err("pagealloc: memory corruption\n");
mm/page_poison.c: dump_page(page, "pagealloc: corrupted page details");
mm/slab.c:static inline bool is_debug_pagealloc_cache(struct kmem_cache
*cachep)
mm/slab.c: return debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
mm/slab.c: if (!is_debug_pagealloc_cache(cachep))
mm/slab.c: if (is_debug_pagealloc_cache(cachep))
mm/slab.c: * To activate debug pagealloc, off-slab management is necessary
mm/slab.c: if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
mm/slab.c: is_debug_pagealloc_cache(cachep))
mm/slub.c: if (!debug_pagealloc_enabled_static())
mm/vmalloc.c: if (debug_pagealloc_enabled_static())
mm/vmalloc.c: if (debug_pagealloc_enabled_static())
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC
2023-05-18 1:35 ` Kefeng Wang
@ 2023-05-18 2:10 ` Andrew Morton
0 siblings, 0 replies; 17+ messages in thread
From: Andrew Morton @ 2023-05-18 2:10 UTC (permalink / raw)
To: Kefeng Wang
Cc: Mike Rapoport, linux-mm, David Hildenbrand, Oscar Salvador,
Rafael J. Wysocki, Pavel Machek, Len Brown, Luis Chamberlain,
Kees Cook, Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel,
ying.huang
On Thu, 18 May 2023 09:35:29 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> > We're irritatingly inconsistent about whether there's an underscore.
> >
> > akpm:/usr/src/25> grep page_alloc mm/*c|wc -l
> > 49
> > akpm:/usr/src/25> grep pagealloc mm/*c|wc -l
> > 28
>
> All the 28 pagealloc naming is from DEBUG_PAGEALLOC feature, they chould
> be changed to page_alloc except the cmdline, but it will lead to long
> function name and don't gain too much advantage, so keep unchange?
Sure, it's probably not the worst thing in there. I was just having
a moan.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 09/13] mm: page_alloc: move mark_free_page() into snapshot.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (7 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 08/13] mm: page_alloc: split out DEBUG_PAGEALLOC Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 10/13] mm: page_alloc: move pm_* function into power Kefeng Wang
` (3 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
The mark_free_page() is only used in kernel/power/snapshot.c,
move it out to reduce a bit of page_alloc.c
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/suspend.h | 3 ---
kernel/power/snapshot.c | 52 ++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 55 -----------------------------------------
3 files changed, 52 insertions(+), 58 deletions(-)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d0d4598a7b3f..3950a7bf33ae 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -364,9 +364,6 @@ struct pbe {
struct pbe *next;
};
-/* mm/page_alloc.c */
-extern void mark_free_pages(struct zone *zone);
-
/**
* struct platform_hibernation_ops - hibernation platform support
*
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..45ef0bf81c85 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1228,6 +1228,58 @@ unsigned int snapshot_additional_pages(struct zone *zone)
return 2 * rtree;
}
+/*
+ * Touch the watchdog for every WD_PAGE_COUNT pages.
+ */
+#define WD_PAGE_COUNT (128*1024)
+
+static void mark_free_pages(struct zone *zone)
+{
+ unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
+ unsigned long flags;
+ unsigned int order, t;
+ struct page *page;
+
+ if (zone_is_empty(zone))
+ return;
+
+ spin_lock_irqsave(&zone->lock, flags);
+
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+
+ if (!--page_count) {
+ touch_nmi_watchdog();
+ page_count = WD_PAGE_COUNT;
+ }
+
+ if (page_zone(page) != zone)
+ continue;
+
+ if (!swsusp_page_is_forbidden(page))
+ swsusp_unset_page_free(page);
+ }
+
+ for_each_migratetype_order(order, t) {
+ list_for_each_entry(page,
+ &zone->free_area[order].free_list[t], buddy_list) {
+ unsigned long i;
+
+ pfn = page_to_pfn(page);
+ for (i = 0; i < (1UL << order); i++) {
+ if (!--page_count) {
+ touch_nmi_watchdog();
+ page_count = WD_PAGE_COUNT;
+ }
+ swsusp_set_page_free(pfn_to_page(pfn + i));
+ }
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
#ifdef CONFIG_HIGHMEM
/**
* count_free_highmem_pages - Compute the total number of free highmem pages.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc9820466377..71bfe72be045 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2401,61 +2401,6 @@ void drain_all_pages(struct zone *zone)
__drain_all_pages(zone, false);
}
-#ifdef CONFIG_HIBERNATION
-
-/*
- * Touch the watchdog for every WD_PAGE_COUNT pages.
- */
-#define WD_PAGE_COUNT (128*1024)
-
-void mark_free_pages(struct zone *zone)
-{
- unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
- unsigned long flags;
- unsigned int order, t;
- struct page *page;
-
- if (zone_is_empty(zone))
- return;
-
- spin_lock_irqsave(&zone->lock, flags);
-
- max_zone_pfn = zone_end_pfn(zone);
- for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
-
- if (!--page_count) {
- touch_nmi_watchdog();
- page_count = WD_PAGE_COUNT;
- }
-
- if (page_zone(page) != zone)
- continue;
-
- if (!swsusp_page_is_forbidden(page))
- swsusp_unset_page_free(page);
- }
-
- for_each_migratetype_order(order, t) {
- list_for_each_entry(page,
- &zone->free_area[order].free_list[t], buddy_list) {
- unsigned long i;
-
- pfn = page_to_pfn(page);
- for (i = 0; i < (1UL << order); i++) {
- if (!--page_count) {
- touch_nmi_watchdog();
- page_count = WD_PAGE_COUNT;
- }
- swsusp_set_page_free(pfn_to_page(pfn + i));
- }
- }
- }
- spin_unlock_irqrestore(&zone->lock, flags);
-}
-#endif /* CONFIG_PM */
-
static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
unsigned int order)
{
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 10/13] mm: page_alloc: move pm_* function into power
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (8 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 09/13] mm: page_alloc: move mark_free_page() into snapshot.c Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 11/13] mm: vmscan: use gfp_has_io_fs() Kefeng Wang
` (2 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
pm_restrict_gfp_mask()/pm_restore_gfp_mask() only used in power,
let's move them out of page_alloc.c.
Adding a general gfp_has_io_fs() function which return true if
gfp with both __GFP_IO and __GFP_FS flags, then use it inside of
pm_suspended_storage(), also the pm_suspended_storage() is moved
into suspend.h.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/gfp.h | 15 ++++-----------
include/linux/suspend.h | 6 ++++++
kernel/power/main.c | 27 +++++++++++++++++++++++++++
kernel/power/power.h | 5 +++++
mm/page_alloc.c | 38 --------------------------------------
mm/swapfile.c | 1 +
6 files changed, 43 insertions(+), 49 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed8cb537c6a7..665f06675c83 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -338,19 +338,12 @@ extern gfp_t gfp_allowed_mask;
/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
-extern void pm_restrict_gfp_mask(void);
-extern void pm_restore_gfp_mask(void);
-
-extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
-
-#ifdef CONFIG_PM_SLEEP
-extern bool pm_suspended_storage(void);
-#else
-static inline bool pm_suspended_storage(void)
+static inline bool gfp_has_io_fs(gfp_t gfp)
{
- return false;
+ return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
-#endif /* CONFIG_PM_SLEEP */
+
+extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
/* The below functions must be run on a range from a single zone. */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3950a7bf33ae..76923051c03d 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -502,6 +502,11 @@ extern void pm_report_max_hw_sleep(u64 t);
extern bool events_check_enabled;
extern suspend_state_t pm_suspend_target_state;
+static inline bool pm_suspended_storage(void)
+{
+ return !gfp_has_io_fs(gfp_allowed_mask);
+}
+
extern bool pm_wakeup_pending(void);
extern void pm_system_wakeup(void);
extern void pm_system_cancel_wakeup(void);
@@ -535,6 +540,7 @@ static inline void ksys_sync_helper(void) {}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+static inline bool pm_suspended_storage(void) { return false; }
static inline bool pm_wakeup_pending(void) { return false; }
static inline void pm_system_wakeup(void) {}
static inline void pm_wakeup_clear(bool reset) {}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3113ec2f1db4..34fc8359145b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -21,6 +21,33 @@
#include "power.h"
#ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended. To avoid races with the suspend/hibernate code,
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
+ */
+static gfp_t saved_gfp_mask;
+
+void pm_restore_gfp_mask(void)
+{
+ WARN_ON(!mutex_is_locked(&system_transition_mutex));
+ if (saved_gfp_mask) {
+ gfp_allowed_mask = saved_gfp_mask;
+ saved_gfp_mask = 0;
+ }
+}
+
+void pm_restrict_gfp_mask(void)
+{
+ WARN_ON(!mutex_is_locked(&system_transition_mutex));
+ WARN_ON(saved_gfp_mask);
+ saved_gfp_mask = gfp_allowed_mask;
+ gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
+}
unsigned int lock_system_sleep(void)
{
diff --git a/kernel/power/power.h b/kernel/power/power.h
index b83c8d5e188d..ac14d1b463d1 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -216,6 +216,11 @@ static inline void suspend_test_finish(const char *label) {}
/* kernel/power/main.c */
extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down);
extern int pm_notifier_call_chain(unsigned long val);
+void pm_restrict_gfp_mask(void);
+void pm_restore_gfp_mask(void);
+#else
+static inline void pm_restrict_gfp_mask(void) {}
+static inline void pm_restore_gfp_mask(void) {}
#endif
#ifdef CONFIG_HIGHMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 71bfe72be045..2a95e095bb2a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -227,44 +227,6 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
page->index = migratetype;
}
-#ifdef CONFIG_PM_SLEEP
-/*
- * The following functions are used by the suspend/hibernate code to temporarily
- * change gfp_allowed_mask in order to avoid using I/O during memory allocations
- * while devices are suspended. To avoid races with the suspend/hibernate code,
- * they should always be called with system_transition_mutex held
- * (gfp_allowed_mask also should only be modified with system_transition_mutex
- * held, unless the suspend/hibernate code is guaranteed not to run in parallel
- * with that modification).
- */
-
-static gfp_t saved_gfp_mask;
-
-void pm_restore_gfp_mask(void)
-{
- WARN_ON(!mutex_is_locked(&system_transition_mutex));
- if (saved_gfp_mask) {
- gfp_allowed_mask = saved_gfp_mask;
- saved_gfp_mask = 0;
- }
-}
-
-void pm_restrict_gfp_mask(void)
-{
- WARN_ON(!mutex_is_locked(&system_transition_mutex));
- WARN_ON(saved_gfp_mask);
- saved_gfp_mask = gfp_allowed_mask;
- gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
-}
-
-bool pm_suspended_storage(void)
-{
- if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
- return false;
- return true;
-}
-#endif /* CONFIG_PM_SLEEP */
-
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
unsigned int pageblock_order __read_mostly;
#endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 274bbf797480..c74259001d5e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -41,6 +41,7 @@
#include <linux/swap_slots.h>
#include <linux/sort.h>
#include <linux/completion.h>
+#include <linux/suspend.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 11/13] mm: vmscan: use gfp_has_io_fs()
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (9 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 10/13] mm: page_alloc: move pm_* function into power Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 12/13] mm: page_alloc: move sysctls into it own fils Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 13/13] mm: page_alloc: move is_check_pages_enabled() into page_alloc.c Kefeng Wang
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
Use gfp_has_io_fs() instead of open-code.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6d0cd2840cf0..15efbfbb1963 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2458,7 +2458,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
* won't get blocked by normal direct-reclaimers, forming a circular
* deadlock.
*/
- if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
+ if (gfp_has_io_fs(sc->gfp_mask))
inactive >>= 3;
too_many = isolated > inactive;
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 12/13] mm: page_alloc: move sysctls into it own fils
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (10 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 11/13] mm: vmscan: use gfp_has_io_fs() Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
2023-05-16 6:38 ` [PATCH v2 13/13] mm: page_alloc: move is_check_pages_enabled() into page_alloc.c Kefeng Wang
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
This moves all page alloc related sysctls to its own file,
as part of the kernel/sysctl.c spring cleaning, also move
some functions declarations from mm.h into internal.h.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/mm.h | 11 -----
include/linux/mmzone.h | 21 ---------
kernel/sysctl.c | 67 ---------------------------
mm/internal.h | 11 +++++
mm/mm_init.c | 2 +
mm/page_alloc.c | 103 +++++++++++++++++++++++++++++++++++------
6 files changed, 102 insertions(+), 113 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3241f4ac903..eabe520139ef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3008,12 +3008,6 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_range(unsigned long, int, unsigned long,
- unsigned long, unsigned long, enum meminit_context,
- struct vmem_altmap *, int migratetype);
-extern void setup_per_zone_wmarks(void);
-extern void calculate_min_free_kbytes(void);
-extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
@@ -3034,11 +3028,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
-/* page_alloc.c */
-extern int min_free_kbytes;
-extern int watermark_boost_factor;
-extern int watermark_scale_factor;
-
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..3a68326c9989 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1512,27 +1512,6 @@ static inline bool has_managed_dma(void)
}
#endif
-/* These two functions are used to setup the per zone pages min values */
-struct ctl_table;
-
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int numa_zonelist_order_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-extern int percpu_pagelist_high_fraction;
-extern char numa_zonelist_order[];
-#define NUMA_ZONELIST_ORDER_LEN 16
#ifndef CONFIG_NUMA
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..a57de67f032f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2119,13 +2119,6 @@ static struct ctl_table vm_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
- {
- .procname = "lowmem_reserve_ratio",
- .data = &sysctl_lowmem_reserve_ratio,
- .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
- .mode = 0644,
- .proc_handler = lowmem_reserve_ratio_sysctl_handler,
- },
{
.procname = "drop_caches",
.data = &sysctl_drop_caches,
@@ -2135,39 +2128,6 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
- {
- .procname = "min_free_kbytes",
- .data = &min_free_kbytes,
- .maxlen = sizeof(min_free_kbytes),
- .mode = 0644,
- .proc_handler = min_free_kbytes_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "watermark_boost_factor",
- .data = &watermark_boost_factor,
- .maxlen = sizeof(watermark_boost_factor),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "watermark_scale_factor",
- .data = &watermark_scale_factor,
- .maxlen = sizeof(watermark_scale_factor),
- .mode = 0644,
- .proc_handler = watermark_scale_factor_sysctl_handler,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_THREE_THOUSAND,
- },
- {
- .procname = "percpu_pagelist_high_fraction",
- .data = &percpu_pagelist_high_fraction,
- .maxlen = sizeof(percpu_pagelist_high_fraction),
- .mode = 0644,
- .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- },
{
.procname = "page_lock_unfairness",
.data = &sysctl_page_lock_unfairness,
@@ -2223,24 +2183,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
- {
- .procname = "min_unmapped_ratio",
- .data = &sysctl_min_unmapped_ratio,
- .maxlen = sizeof(sysctl_min_unmapped_ratio),
- .mode = 0644,
- .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE_HUNDRED,
- },
- {
- .procname = "min_slab_ratio",
- .data = &sysctl_min_slab_ratio,
- .maxlen = sizeof(sysctl_min_slab_ratio),
- .mode = 0644,
- .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE_HUNDRED,
- },
#endif
#ifdef CONFIG_SMP
{
@@ -2267,15 +2209,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = mmap_min_addr_handler,
},
#endif
-#ifdef CONFIG_NUMA
- {
- .procname = "numa_zonelist_order",
- .data = &numa_zonelist_order,
- .maxlen = NUMA_ZONELIST_ORDER_LEN,
- .mode = 0644,
- .proc_handler = numa_zonelist_order_handler,
- },
-#endif
#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
diff --git a/mm/internal.h b/mm/internal.h
index 79324b7f2bc8..5fdf930a87b5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -213,6 +213,13 @@ static inline bool is_check_pages_enabled(void)
return static_branch_unlikely(&check_pages_enabled);
}
+extern int min_free_kbytes;
+
+void setup_per_zone_wmarks(void);
+void calculate_min_free_kbytes(void);
+int __meminit init_per_zone_wmark_min(void);
+void page_alloc_sysctl_init(void);
+
/*
* Structure for holding the mostly immutable allocation parameters passed
* between functions involved in allocations, including the alloc_pages*
@@ -423,6 +430,10 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr,
int nid, bool exact_nid);
+void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
+ unsigned long, enum meminit_context, struct vmem_altmap *, int);
+
+
int split_free_page(struct page *free_page,
unsigned int order, unsigned long split_pfn_offset);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 0fd4ddfdfb2e..10bf560302c4 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2392,6 +2392,8 @@ void __init page_alloc_init_late(void)
/* Initialize page ext after all struct pages are initialized. */
if (deferred_struct_pages)
page_ext_init();
+
+ page_alloc_sysctl_init();
}
#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a95e095bb2a..5e8680669388 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -206,7 +206,6 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
};
EXPORT_SYMBOL(node_states);
-int percpu_pagelist_high_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
/*
@@ -302,8 +301,8 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
-int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
+static int watermark_boost_factor __read_mostly = 15000;
+static int watermark_scale_factor = 10;
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
int movable_zone;
@@ -4917,12 +4916,12 @@ static int __parse_numa_zonelist_order(char *s)
return 0;
}
-char numa_zonelist_order[] = "Node";
-
+static char numa_zonelist_order[] = "Node";
+#define NUMA_ZONELIST_ORDER_LEN 16
/*
* sysctl handler for numa_zonelist_order
*/
-int numa_zonelist_order_handler(struct ctl_table *table, int write,
+static int numa_zonelist_order_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
if (write)
@@ -4930,7 +4929,6 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write,
return proc_dostring(table, write, buffer, length, ppos);
}
-
static int node_load[MAX_NUMNODES];
/**
@@ -5333,6 +5331,7 @@ static int zone_batchsize(struct zone *zone)
#endif
}
+static int percpu_pagelist_high_fraction;
static int zone_highsize(struct zone *zone, int batch, int cpu_online)
{
#ifdef CONFIG_MMU
@@ -5862,7 +5861,7 @@ postcore_initcall(init_per_zone_wmark_min)
* that we can call two helper functions whenever min_free_kbytes
* changes.
*/
-int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
+static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5878,7 +5877,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
return 0;
}
-int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
+static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5908,7 +5907,7 @@ static void setup_min_unmapped_ratio(void)
}
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5935,7 +5934,7 @@ static void setup_min_slab_ratio(void)
sysctl_min_slab_ratio) / 100;
}
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5959,8 +5958,8 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
* minimum watermarks. The lowmem reserve ratio can only make sense
* if in function of the boot time zone sizes.
*/
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
- void *buffer, size_t *length, loff_t *ppos)
+static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,
+ int write, void *buffer, size_t *length, loff_t *ppos)
{
int i;
@@ -5980,7 +5979,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
* cpu. It is the fraction of total pages in each zone that a hot per cpu
* pagelist can have before it gets flushed back to buddy allocator.
*/
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
+static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
int write, void *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
@@ -6013,6 +6012,82 @@ int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
return ret;
}
+static struct ctl_table page_alloc_sysctl_table[] = {
+ {
+ .procname = "min_free_kbytes",
+ .data = &min_free_kbytes,
+ .maxlen = sizeof(min_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_boost_factor",
+ .data = &watermark_boost_factor,
+ .maxlen = sizeof(watermark_boost_factor),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_scale_factor",
+ .data = &watermark_scale_factor,
+ .maxlen = sizeof(watermark_scale_factor),
+ .mode = 0644,
+ .proc_handler = watermark_scale_factor_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_THREE_THOUSAND,
+ },
+ {
+ .procname = "percpu_pagelist_high_fraction",
+ .data = &percpu_pagelist_high_fraction,
+ .maxlen = sizeof(percpu_pagelist_high_fraction),
+ .mode = 0644,
+ .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "lowmem_reserve_ratio",
+ .data = &sysctl_lowmem_reserve_ratio,
+ .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
+ .mode = 0644,
+ .proc_handler = lowmem_reserve_ratio_sysctl_handler,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_zonelist_order",
+ .data = &numa_zonelist_order,
+ .maxlen = NUMA_ZONELIST_ORDER_LEN,
+ .mode = 0644,
+ .proc_handler = numa_zonelist_order_handler,
+ },
+ {
+ .procname = "min_unmapped_ratio",
+ .data = &sysctl_min_unmapped_ratio,
+ .maxlen = sizeof(sysctl_min_unmapped_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "min_slab_ratio",
+ .data = &sysctl_min_slab_ratio,
+ .maxlen = sizeof(sysctl_min_slab_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+#endif
+ {}
+};
+
+void __init page_alloc_sysctl_init(void)
+{
+ register_sysctl_init("vm", page_alloc_sysctl_table);
+}
+
#ifdef CONFIG_CONTIG_ALLOC
/* Usage: See admin-guide/dynamic-debug-howto.rst */
static void alloc_contig_dump_pages(struct list_head *page_list)
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH v2 13/13] mm: page_alloc: move is_check_pages_enabled() into page_alloc.c
2023-05-16 6:38 [PATCH -next v2 00/13] mm: page_alloc: misc cleanup and refector Kefeng Wang
` (11 preceding siblings ...)
2023-05-16 6:38 ` [PATCH v2 12/13] mm: page_alloc: move sysctls into it own fils Kefeng Wang
@ 2023-05-16 6:38 ` Kefeng Wang
12 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-05-16 6:38 UTC (permalink / raw)
To: Andrew Morton, Mike Rapoport, linux-mm
Cc: David Hildenbrand, Oscar Salvador, Rafael J. Wysocki,
Pavel Machek, Len Brown, Luis Chamberlain, Kees Cook,
Iurii Zaikin, linux-kernel, linux-pm, linux-fsdevel, ying.huang,
Kefeng Wang
The is_check_pages_enabled() only used in page_alloc.c, move it into
page_alloc.c, also use it in free_tail_page_prepare().
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/internal.h | 5 -----
mm/page_alloc.c | 7 ++++++-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 5fdf930a87b5..bb6542279599 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -208,11 +208,6 @@ extern char * const zone_names[MAX_NR_ZONES];
/* perform sanity checks on struct pages being allocated or freed */
DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
-static inline bool is_check_pages_enabled(void)
-{
- return static_branch_unlikely(&check_pages_enabled);
-}
-
extern int min_free_kbytes;
void setup_per_zone_wmarks(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5e8680669388..1023f41de2fb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -983,6 +983,11 @@ static inline bool free_page_is_bad(struct page *page)
return true;
}
+static inline bool is_check_pages_enabled(void)
+{
+ return static_branch_unlikely(&check_pages_enabled);
+}
+
static int free_tail_page_prepare(struct page *head_page, struct page *page)
{
struct folio *folio = (struct folio *)head_page;
@@ -994,7 +999,7 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
*/
BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
- if (!static_branch_unlikely(&check_pages_enabled)) {
+ if (!is_check_pages_enabled()) {
ret = 0;
goto out;
}
--
2.35.3
^ permalink raw reply [flat|nested] 17+ messages in thread