* [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss
@ 2023-08-04 15:32 Liam Ni
2023-08-14 15:59 ` Mike Rapoport
0 siblings, 1 reply; 4+ messages in thread
From: Liam Ni @ 2023-08-04 15:32 UTC (permalink / raw)
To: linux-mm, linux-kernel, loongarch, Mike Rapoport
Cc: zhoubinbin, chenfeiyang, jiaxun.yang, Andrew Morton,
H. Peter Anvin, x86, Borislav Petkov, Ingo Molnar,
Thomas Gleixner, peterz, luto, Dave Hansen, kernel, chenhuacai
Optimize the way of calculating missing pages.
In the previous implementation, We calculate missing pages as follows:
1. calculate numaram by traverse all the numa_meminfo's and for each of
them traverse all the regions in memblock.memory to prepare for
counting missing pages.
2. Traverse all the regions in memblock.memory again to get e820ram.
3. the missing page is (e820ram - numaram )
But,it's enough to count memory in ‘memblock.memory’ that doesn't have
the node assigned.
V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@gmail.com/
V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@gmail.com/
Signed-off-by: Liam Ni <zhiguangni01@gmail.com>
---
arch/loongarch/kernel/numa.c | 23 ++++++++---------------
arch/x86/mm/numa.c | 26 +++++++-------------------
include/linux/mm.h | 1 +
mm/mm_init.c | 20 ++++++++++++++++++++
4 files changed, 36 insertions(+), 34 deletions(-)
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 708665895b47..0239891e4d19 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+static bool __init memblock_validate_numa_coverage(const u64 limit)
{
- int i;
- u64 numaram, biosram;
+ u64 lo_pg;
- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
+ lo_pg = max_pfn - calculate_without_node_pages_in_range();
- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
+ /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+ if (lo_pg >= limit) {
+ pr_err("NUMA: We lost 1m size page.\n");
+ return false;
}
- max_pfn = max_low_pfn;
- biosram = max_pfn - absent_pages_in_range(0, max_pfn);
- BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
return true;
}
@@ -428,7 +421,7 @@ int __init init_numa_memory(void)
return -EINVAL;
init_node_memblock();
- if (numa_meminfo_cover_memory(&numa_meminfo) == false)
+ if (memblock_validate_numa_coverage(SZ_1M) == false)
return -EINVAL;
for_each_node_mask(node, node_possible_map) {
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2aadb2019b4f..14feec144675 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+static bool __init memblock_validate_numa_coverage(const u64 limit)
{
- u64 numaram, e820ram;
- int i;
+ u64 lo_pg;
- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
- }
-
- e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
+ lo_pg = max_pfn - calculate_without_node_pages_in_range();
/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
- printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
%LuMB e820 RAM. Not used.\n",
- (numaram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
+ if (lo_pg >= limit) {
+ pr_err("NUMA: We lost 1m size page.\n");
return false;
}
+
return true;
}
@@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
numa_meminfo *mi)
return -EINVAL;
}
}
- if (!numa_meminfo_cover_memory(mi))
+ if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;
/* Finally register nodes. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0daef3f2f029..b32457ad1ae3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
unsigned long start_pfn,
unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
+extern unsigned long calculate_without_node_pages_in_range(void);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ddd18a89b66..13a4883787e3 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1132,6 +1132,26 @@ static void __init
adjust_zone_range_for_zone_movable(int nid,
}
}
+/**
+ * @start_pfn: The start PFN to start searching for holes
+ * @end_pfn: The end PFN to stop searching for holes
+ *
+ * Return: Return the number of page frames without node assigned
within a range.
+ */
+unsigned long __init calculate_without_node_pages_in_range(void)
+{
+ unsigned long num_pages;
+ unsigned long start_pfn, end_pfn;
+ int nid, i;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+ if (nid == NUMA_NO_NODE)
+ num_pages += end_pfn - start_pfn;
+ }
+
+ return num_pages;
+}
+
/*
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss
2023-08-04 15:32 [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss Liam Ni
@ 2023-08-14 15:59 ` Mike Rapoport
2023-08-22 11:49 ` Liam Ni
0 siblings, 1 reply; 4+ messages in thread
From: Mike Rapoport @ 2023-08-14 15:59 UTC (permalink / raw)
To: Liam Ni
Cc: linux-mm, linux-kernel, loongarch, zhoubinbin, chenfeiyang,
jiaxun.yang, Andrew Morton, H. Peter Anvin, x86, Borislav Petkov,
Ingo Molnar, Thomas Gleixner, peterz, luto, Dave Hansen, kernel,
chenhuacai
On Fri, Aug 04, 2023 at 11:32:51PM +0800, Liam Ni wrote:
> Optimize the way of calculating missing pages.
>
> In the previous implementation, We calculate missing pages as follows:
> 1. calculate numaram by traverse all the numa_meminfo's and for each of
> them traverse all the regions in memblock.memory to prepare for
> counting missing pages.
>
> 2. Traverse all the regions in memblock.memory again to get e820ram.
>
> 3. the missing page is (e820ram - numaram )
>
> But,it's enough to count memory in ‘memblock.memory’ that doesn't have
> the node assigned.
>
> V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@gmail.com/
> V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@gmail.com/
>
> Signed-off-by: Liam Ni <zhiguangni01@gmail.com>
> ---
> arch/loongarch/kernel/numa.c | 23 ++++++++---------------
> arch/x86/mm/numa.c | 26 +++++++-------------------
> include/linux/mm.h | 1 +
> mm/mm_init.c | 20 ++++++++++++++++++++
> 4 files changed, 36 insertions(+), 34 deletions(-)
>
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 708665895b47..0239891e4d19 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init memblock_validate_numa_coverage(const u64 limit)
There is no need to have arch specific memblock_validate_numa_coverage().
You can add this function to memblock and call it from NUMA initialization
instead of numa_meminfo_cover_memory().
The memblock_validate_numa_coverage() will count all the pages without node
ID set and compare to the threshold provided by the architectures.
> {
> - int i;
> - u64 numaram, biosram;
> + u64 lo_pg;
>
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> + lo_pg = max_pfn - calculate_without_node_pages_in_range();
>
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> + if (lo_pg >= limit) {
> + pr_err("NUMA: We lost 1m size page.\n");
> + return false;
> }
> - max_pfn = max_low_pfn;
> - biosram = max_pfn - absent_pages_in_range(0, max_pfn);
>
> - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
> return true;
> }
>
> @@ -428,7 +421,7 @@ int __init init_numa_memory(void)
> return -EINVAL;
>
> init_node_memblock();
> - if (numa_meminfo_cover_memory(&numa_meminfo) == false)
> + if (memblock_validate_numa_coverage(SZ_1M) == false)
> return -EINVAL;
>
> for_each_node_mask(node, node_possible_map) {
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 2aadb2019b4f..14feec144675 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init memblock_validate_numa_coverage(const u64 limit)
> {
> - u64 numaram, e820ram;
> - int i;
> + u64 lo_pg;
>
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> - }
> -
> - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> + lo_pg = max_pfn - calculate_without_node_pages_in_range();
>
> /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
> %LuMB e820 RAM. Not used.\n",
> - (numaram << PAGE_SHIFT) >> 20,
> - (e820ram << PAGE_SHIFT) >> 20);
> + if (lo_pg >= limit) {
> + pr_err("NUMA: We lost 1m size page.\n");
> return false;
> }
> +
> return true;
> }
>
> @@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
> numa_meminfo *mi)
> return -EINVAL;
> }
> }
> - if (!numa_meminfo_cover_memory(mi))
> + if (!memblock_validate_numa_coverage(SZ_1M))
> return -EINVAL;
>
> /* Finally register nodes. */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0daef3f2f029..b32457ad1ae3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
> unsigned long start_pfn,
> unsigned long end_pfn);
> extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> unsigned long end_pfn);
> +extern unsigned long calculate_without_node_pages_in_range(void);
> extern void get_pfn_range_for_nid(unsigned int nid,
> unsigned long *start_pfn, unsigned long *end_pfn);
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 3ddd18a89b66..13a4883787e3 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1132,6 +1132,26 @@ static void __init
> adjust_zone_range_for_zone_movable(int nid,
> }
> }
>
> +/**
> + * @start_pfn: The start PFN to start searching for holes
> + * @end_pfn: The end PFN to stop searching for holes
> + *
> + * Return: Return the number of page frames without node assigned
> within a range.
> + */
> +unsigned long __init calculate_without_node_pages_in_range(void)
> +{
> + unsigned long num_pages;
> + unsigned long start_pfn, end_pfn;
> + int nid, i;
> +
> + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
> + if (nid == NUMA_NO_NODE)
> + num_pages += end_pfn - start_pfn;
> + }
> +
> + return num_pages;
> +}
> +
> /*
> * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> * then all holes in the requested range will be accounted for.
> --
> 2.25.1
--
Sincerely yours,
Mike.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss
2023-08-14 15:59 ` Mike Rapoport
@ 2023-08-22 11:49 ` Liam Ni
2023-08-23 14:35 ` Mike Rapoport
0 siblings, 1 reply; 4+ messages in thread
From: Liam Ni @ 2023-08-22 11:49 UTC (permalink / raw)
To: Mike Rapoport
Cc: linux-mm, linux-kernel, loongarch, zhoubinbin, chenfeiyang,
jiaxun.yang, Andrew Morton, H. Peter Anvin, x86, Borislav Petkov,
Ingo Molnar, Thomas Gleixner, peterz, luto, Dave Hansen, kernel,
chenhuacai
On Tue, 15 Aug 2023 at 00:00, Mike Rapoport <rppt@kernel.org> wrote:
>
> On Fri, Aug 04, 2023 at 11:32:51PM +0800, Liam Ni wrote:
> > Optimize the way of calculating missing pages.
> >
> > In the previous implementation, We calculate missing pages as follows:
> > 1. calculate numaram by traverse all the numa_meminfo's and for each of
> > them traverse all the regions in memblock.memory to prepare for
> > counting missing pages.
> >
> > 2. Traverse all the regions in memblock.memory again to get e820ram.
> >
> > 3. the missing page is (e820ram - numaram )
> >
> > But,it's enough to count memory in ‘memblock.memory’ that doesn't have
> > the node assigned.
> >
> > V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@gmail.com/
> > V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@gmail.com/
> >
> > Signed-off-by: Liam Ni <zhiguangni01@gmail.com>
> > ---
> > arch/loongarch/kernel/numa.c | 23 ++++++++---------------
> > arch/x86/mm/numa.c | 26 +++++++-------------------
> > include/linux/mm.h | 1 +
> > mm/mm_init.c | 20 ++++++++++++++++++++
> > 4 files changed, 36 insertions(+), 34 deletions(-)
> >
> > diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> > index 708665895b47..0239891e4d19 100644
> > --- a/arch/loongarch/kernel/numa.c
> > +++ b/arch/loongarch/kernel/numa.c
> > @@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
> > * Sanity check to catch more bad NUMA configurations (they are amazingly
> > * common). Make sure the nodes cover all memory.
> > */
> > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > +static bool __init memblock_validate_numa_coverage(const u64 limit)
>
> There is no need to have arch specific memblock_validate_numa_coverage().
> You can add this function to memblock and call it from NUMA initialization
> instead of numa_meminfo_cover_memory().
Remove implementation of numa_meminfo_cover_memory function?
>
> The memblock_validate_numa_coverage() will count all the pages without node
> ID set and compare to the threshold provided by the architectures.
>
> > {
> > - int i;
> > - u64 numaram, biosram;
> > + u64 lo_pg;
> >
> > - numaram = 0;
> > - for (i = 0; i < mi->nr_blks; i++) {
> > - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > + lo_pg = max_pfn - calculate_without_node_pages_in_range();
> >
> > - numaram += e - s;
> > - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > - if ((s64)numaram < 0)
> > - numaram = 0;
> > + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > + if (lo_pg >= limit) {
> > + pr_err("NUMA: We lost 1m size page.\n");
> > + return false;
> > }
> > - max_pfn = max_low_pfn;
> > - biosram = max_pfn - absent_pages_in_range(0, max_pfn);
> >
> > - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
> > return true;
> > }
> >
> > @@ -428,7 +421,7 @@ int __init init_numa_memory(void)
> > return -EINVAL;
> >
> > init_node_memblock();
> > - if (numa_meminfo_cover_memory(&numa_meminfo) == false)
> > + if (memblock_validate_numa_coverage(SZ_1M) == false)
> > return -EINVAL;
> >
> > for_each_node_mask(node, node_possible_map) {
> > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > index 2aadb2019b4f..14feec144675 100644
> > --- a/arch/x86/mm/numa.c
> > +++ b/arch/x86/mm/numa.c
> > @@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
> > * Sanity check to catch more bad NUMA configurations (they are amazingly
> > * common). Make sure the nodes cover all memory.
> > */
> > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > +static bool __init memblock_validate_numa_coverage(const u64 limit)
> > {
> > - u64 numaram, e820ram;
> > - int i;
> > + u64 lo_pg;
> >
> > - numaram = 0;
> > - for (i = 0; i < mi->nr_blks; i++) {
> > - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > - numaram += e - s;
> > - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > - if ((s64)numaram < 0)
> > - numaram = 0;
> > - }
> > -
> > - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> > + lo_pg = max_pfn - calculate_without_node_pages_in_range();
> >
> > /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> > - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
> > %LuMB e820 RAM. Not used.\n",
> > - (numaram << PAGE_SHIFT) >> 20,
> > - (e820ram << PAGE_SHIFT) >> 20);
> > + if (lo_pg >= limit) {
> > + pr_err("NUMA: We lost 1m size page.\n");
> > return false;
> > }
> > +
> > return true;
> > }
> >
> > @@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
> > numa_meminfo *mi)
> > return -EINVAL;
> > }
> > }
> > - if (!numa_meminfo_cover_memory(mi))
> > + if (!memblock_validate_numa_coverage(SZ_1M))
> > return -EINVAL;
> >
> > /* Finally register nodes. */
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 0daef3f2f029..b32457ad1ae3 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
> > unsigned long start_pfn,
> > unsigned long end_pfn);
> > extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> > unsigned long end_pfn);
> > +extern unsigned long calculate_without_node_pages_in_range(void);
> > extern void get_pfn_range_for_nid(unsigned int nid,
> > unsigned long *start_pfn, unsigned long *end_pfn);
> >
> > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > index 3ddd18a89b66..13a4883787e3 100644
> > --- a/mm/mm_init.c
> > +++ b/mm/mm_init.c
> > @@ -1132,6 +1132,26 @@ static void __init
> > adjust_zone_range_for_zone_movable(int nid,
> > }
> > }
> >
> > +/**
> > + * @start_pfn: The start PFN to start searching for holes
> > + * @end_pfn: The end PFN to stop searching for holes
> > + *
> > + * Return: Return the number of page frames without node assigned
> > within a range.
> > + */
> > +unsigned long __init calculate_without_node_pages_in_range(void)
> > +{
> > + unsigned long num_pages;
> > + unsigned long start_pfn, end_pfn;
> > + int nid, i;
> > +
> > + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
> > + if (nid == NUMA_NO_NODE)
> > + num_pages += end_pfn - start_pfn;
> > + }
> > +
> > + return num_pages;
> > +}
> > +
> > /*
> > * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> > * then all holes in the requested range will be accounted for.
> > --
> > 2.25.1
>
> --
> Sincerely yours,
> Mike.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss
2023-08-22 11:49 ` Liam Ni
@ 2023-08-23 14:35 ` Mike Rapoport
0 siblings, 0 replies; 4+ messages in thread
From: Mike Rapoport @ 2023-08-23 14:35 UTC (permalink / raw)
To: Liam Ni
Cc: linux-mm, linux-kernel, loongarch, zhoubinbin, chenfeiyang,
jiaxun.yang, Andrew Morton, H. Peter Anvin, x86, Borislav Petkov,
Ingo Molnar, Thomas Gleixner, peterz, luto, Dave Hansen, kernel,
chenhuacai
On Tue, Aug 22, 2023 at 07:49:05PM +0800, Liam Ni wrote:
> On Tue, 15 Aug 2023 at 00:00, Mike Rapoport <rppt@kernel.org> wrote:
> >
> > On Fri, Aug 04, 2023 at 11:32:51PM +0800, Liam Ni wrote:
> > > Optimize the way of calculating missing pages.
> > >
> > > In the previous implementation, We calculate missing pages as follows:
> > > 1. calculate numaram by traverse all the numa_meminfo's and for each of
> > > them traverse all the regions in memblock.memory to prepare for
> > > counting missing pages.
> > >
> > > 2. Traverse all the regions in memblock.memory again to get e820ram.
> > >
> > > 3. the missing page is (e820ram - numaram )
> > >
> > > But,it's enough to count memory in ‘memblock.memory’ that doesn't have
> > > the node assigned.
> > >
> > > V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@gmail.com/
> > > V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@gmail.com/
> > >
> > > Signed-off-by: Liam Ni <zhiguangni01@gmail.com>
> > > ---
> > > arch/loongarch/kernel/numa.c | 23 ++++++++---------------
> > > arch/x86/mm/numa.c | 26 +++++++-------------------
> > > include/linux/mm.h | 1 +
> > > mm/mm_init.c | 20 ++++++++++++++++++++
> > > 4 files changed, 36 insertions(+), 34 deletions(-)
> > >
> > > diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> > > index 708665895b47..0239891e4d19 100644
> > > --- a/arch/loongarch/kernel/numa.c
> > > +++ b/arch/loongarch/kernel/numa.c
> > > @@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
> > > * Sanity check to catch more bad NUMA configurations (they are amazingly
> > > * common). Make sure the nodes cover all memory.
> > > */
> > > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > > +static bool __init memblock_validate_numa_coverage(const u64 limit)
> >
> > There is no need to have arch specific memblock_validate_numa_coverage().
> > You can add this function to memblock and call it from NUMA initialization
> > instead of numa_meminfo_cover_memory().
>
> Remove implementation of numa_meminfo_cover_memory function?
Yes, that's the idea.
> > The memblock_validate_numa_coverage() will count all the pages without node
> > ID set and compare to the threshold provided by the architectures.
> >
> > > {
> > > - int i;
> > > - u64 numaram, biosram;
> > > + u64 lo_pg;
> > >
> > > - numaram = 0;
> > > - for (i = 0; i < mi->nr_blks; i++) {
> > > - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > > - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > > + lo_pg = max_pfn - calculate_without_node_pages_in_range();
> > >
> > > - numaram += e - s;
> > > - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > > - if ((s64)numaram < 0)
> > > - numaram = 0;
> > > + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > > + if (lo_pg >= limit) {
> > > + pr_err("NUMA: We lost 1m size page.\n");
> > > + return false;
> > > }
> > > - max_pfn = max_low_pfn;
> > > - biosram = max_pfn - absent_pages_in_range(0, max_pfn);
> > >
> > > - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
> > > return true;
> > > }
> > >
> > > @@ -428,7 +421,7 @@ int __init init_numa_memory(void)
> > > return -EINVAL;
> > >
> > > init_node_memblock();
> > > - if (numa_meminfo_cover_memory(&numa_meminfo) == false)
> > > + if (memblock_validate_numa_coverage(SZ_1M) == false)
> > > return -EINVAL;
> > >
> > > for_each_node_mask(node, node_possible_map) {
> > > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > > index 2aadb2019b4f..14feec144675 100644
> > > --- a/arch/x86/mm/numa.c
> > > +++ b/arch/x86/mm/numa.c
> > > @@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
> > > * Sanity check to catch more bad NUMA configurations (they are amazingly
> > > * common). Make sure the nodes cover all memory.
> > > */
> > > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > > +static bool __init memblock_validate_numa_coverage(const u64 limit)
> > > {
> > > - u64 numaram, e820ram;
> > > - int i;
> > > + u64 lo_pg;
> > >
> > > - numaram = 0;
> > > - for (i = 0; i < mi->nr_blks; i++) {
> > > - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > > - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > > - numaram += e - s;
> > > - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > > - if ((s64)numaram < 0)
> > > - numaram = 0;
> > > - }
> > > -
> > > - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> > > + lo_pg = max_pfn - calculate_without_node_pages_in_range();
> > >
> > > /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > > - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> > > - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
> > > %LuMB e820 RAM. Not used.\n",
> > > - (numaram << PAGE_SHIFT) >> 20,
> > > - (e820ram << PAGE_SHIFT) >> 20);
> > > + if (lo_pg >= limit) {
> > > + pr_err("NUMA: We lost 1m size page.\n");
> > > return false;
> > > }
> > > +
> > > return true;
> > > }
> > >
> > > @@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
> > > numa_meminfo *mi)
> > > return -EINVAL;
> > > }
> > > }
> > > - if (!numa_meminfo_cover_memory(mi))
> > > + if (!memblock_validate_numa_coverage(SZ_1M))
> > > return -EINVAL;
> > >
> > > /* Finally register nodes. */
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index 0daef3f2f029..b32457ad1ae3 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
> > > unsigned long start_pfn,
> > > unsigned long end_pfn);
> > > extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> > > unsigned long end_pfn);
> > > +extern unsigned long calculate_without_node_pages_in_range(void);
> > > extern void get_pfn_range_for_nid(unsigned int nid,
> > > unsigned long *start_pfn, unsigned long *end_pfn);
> > >
> > > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > > index 3ddd18a89b66..13a4883787e3 100644
> > > --- a/mm/mm_init.c
> > > +++ b/mm/mm_init.c
> > > @@ -1132,6 +1132,26 @@ static void __init
> > > adjust_zone_range_for_zone_movable(int nid,
> > > }
> > > }
> > >
> > > +/**
> > > + * @start_pfn: The start PFN to start searching for holes
> > > + * @end_pfn: The end PFN to stop searching for holes
> > > + *
> > > + * Return: Return the number of page frames without node assigned
> > > within a range.
> > > + */
> > > +unsigned long __init calculate_without_node_pages_in_range(void)
> > > +{
> > > + unsigned long num_pages;
> > > + unsigned long start_pfn, end_pfn;
> > > + int nid, i;
> > > +
> > > + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
> > > + if (nid == NUMA_NO_NODE)
> > > + num_pages += end_pfn - start_pfn;
> > > + }
> > > +
> > > + return num_pages;
> > > +}
> > > +
> > > /*
> > > * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> > > * then all holes in the requested range will be accounted for.
> > > --
> > > 2.25.1
> >
> > --
> > Sincerely yours,
> > Mike.
--
Sincerely yours,
Mike.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-08-23 14:36 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-04 15:32 [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss Liam Ni
2023-08-14 15:59 ` Mike Rapoport
2023-08-22 11:49 ` Liam Ni
2023-08-23 14:35 ` Mike Rapoport
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox