From: "Martin J. Bligh" <fletch@aracnet.com>
To: Andrew Morton <akpm@zip.com.au>
Cc: linux-mm mailing list <linux-mm@kvack.org>
Subject: Cleanup of alloc_pages code (removes _alloc_pages)
Date: Sat, 07 Sep 2002 22:08:32 -0700 [thread overview]
Message-ID: <185353754.1031436512@[10.10.2.3]> (raw)
[-- Attachment #1: Type: text/plain, Size: 899 bytes --]
This patch is was originally from Andrea's tree (from SGI??),
and has been tweaked since by both Christoph (who cleaned up
all the code), and myself (who just hit it until it worked).
It removes _alloc_pages, and adds all nodes to the zonelists
directly, which also changes the fallback zone order to
something more sensible ... instead of:
"foreach (node) { foreach (zone) }"
we now do something more like
"foreach (zone_type) { foreach (node) }"
Christoph has a more recent version that's fancier and does
a couple more cleanups, but it seems to have a bug in it that
I can't track down easily, so I propose we do the simple thing
for now, and take the rest of the cleanups when it works ...
it seems to build nicely on top of this seperately to me.
Tested on 16-way NUMA-Q with discontigmem + NUMA support
and on a standard PC (well, boots and appears functional).
On top of 2.5.33-mm4
M.
[-- Attachment #2: 33-alloc_pages --]
[-- Type: application/octet-stream, Size: 11920 bytes --]
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/arch/sparc64/mm/init.c 33-alloc_pages/arch/sparc64/mm/init.c
--- 32-free_area_init/arch/sparc64/mm/init.c Fri Sep 6 22:40:47 2002
+++ 33-alloc_pages/arch/sparc64/mm/init.c Sat Sep 7 09:03:33 2002
@@ -1734,7 +1734,7 @@
* Set up the zero page, mark it reserved, so that page count
* is not manipulated when freeing the page from user ptes.
*/
- mem_map_zero = _alloc_pages(GFP_KERNEL, 0);
+ mem_map_zero = alloc_pages(GFP_KERNEL, 0);
if (mem_map_zero == NULL) {
prom_printf("paging_init: Cannot alloc zero page.\n");
prom_halt();
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/asm-alpha/mmzone.h 33-alloc_pages/include/asm-alpha/mmzone.h
--- 32-free_area_init/include/asm-alpha/mmzone.h Fri Sep 6 23:03:19 2002
+++ 33-alloc_pages/include/asm-alpha/mmzone.h Sat Sep 7 09:03:58 2002
@@ -36,12 +36,10 @@
#ifdef CONFIG_ALPHA_WILDFIRE
# define ALPHA_PA_TO_NID(pa) ((pa) >> 36) /* 16 nodes max due 43bit kseg */
-#define NODE_MAX_MEM_SIZE (64L * 1024L * 1024L * 1024L) /* 64 GB */
-#define MAX_NUMNODES WILDFIRE_MAX_QBB
+# define NODE_MAX_MEM_SIZE (64L * 1024L * 1024L * 1024L) /* 64 GB */
#else
# define ALPHA_PA_TO_NID(pa) (0)
-#define NODE_MAX_MEM_SIZE (~0UL)
-#define MAX_NUMNODES 1
+# define NODE_MAX_MEM_SIZE (~0UL)
#endif
#define PHYSADDR_TO_NID(pa) ALPHA_PA_TO_NID(pa)
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/asm-alpha/numnodes.h 33-alloc_pages/include/asm-alpha/numnodes.h
--- 32-free_area_init/include/asm-alpha/numnodes.h Wed Dec 31 16:00:00 1969
+++ 33-alloc_pages/include/asm-alpha/numnodes.h Sat Sep 7 09:03:33 2002
@@ -0,0 +1,12 @@
+#ifndef _ASM_MAX_NUMNODES_H
+#define _ASM_MAX_NUMNODES_H
+
+/*
+ * Currently the Wildfire is the only discontigmem/NUMA capable Alpha core.
+ */
+#if defined(CONFIG_ALPHA_WILDFIRE) || defined(CONFIG_ALPHA_GENERIC)
+# include <asm/core_wildfire.h>
+# define MAX_NUMNODES WILDFIRE_MAX_QBB
+#endif
+
+#endif /* _ASM_MAX_NUMNODES_H */
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/asm-i386/max_numnodes.h 33-alloc_pages/include/asm-i386/max_numnodes.h
--- 32-free_area_init/include/asm-i386/max_numnodes.h Fri Sep 6 22:40:51 2002
+++ 33-alloc_pages/include/asm-i386/max_numnodes.h Wed Dec 31 16:00:00 1969
@@ -1,12 +0,0 @@
-#ifndef _ASM_MAX_NUMNODES_H
-#define _ASM_MAX_NUMNODES_H
-
-#include <linux/config.h>
-
-#ifdef CONFIG_X86_NUMAQ
-#include <asm/numaq.h>
-#else
-#define MAX_NUMNODES 1
-#endif /* CONFIG_X86_NUMAQ */
-
-#endif /* _ASM_MAX_NUMNODES_H */
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/asm-i386/mmzone.h 33-alloc_pages/include/asm-i386/mmzone.h
--- 32-free_area_init/include/asm-i386/mmzone.h Fri Sep 6 23:03:19 2002
+++ 33-alloc_pages/include/asm-i386/mmzone.h Sat Sep 7 11:31:26 2002
@@ -6,6 +6,8 @@
#ifndef _ASM_MMZONE_H_
#define _ASM_MMZONE_H_
+#include <asm/smp.h>
+
#ifdef CONFIG_DISCONTIGMEM
#ifdef CONFIG_X86_NUMAQ
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/asm-i386/numnodes.h 33-alloc_pages/include/asm-i386/numnodes.h
--- 32-free_area_init/include/asm-i386/numnodes.h Wed Dec 31 16:00:00 1969
+++ 33-alloc_pages/include/asm-i386/numnodes.h Fri Sep 6 22:40:51 2002
@@ -0,0 +1,12 @@
+#ifndef _ASM_MAX_NUMNODES_H
+#define _ASM_MAX_NUMNODES_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_X86_NUMAQ
+#include <asm/numaq.h>
+#else
+#define MAX_NUMNODES 1
+#endif /* CONFIG_X86_NUMAQ */
+
+#endif /* _ASM_MAX_NUMNODES_H */
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/linux/gfp.h 33-alloc_pages/include/linux/gfp.h
--- 32-free_area_init/include/linux/gfp.h Sat Aug 31 15:04:53 2002
+++ 33-alloc_pages/include/linux/gfp.h Sat Sep 7 09:08:46 2002
@@ -39,18 +39,25 @@
* can allocate highmem pages, the *get*page*() variants return
* virtual kernel addresses to the allocated page(s).
*/
-extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, struct zonelist *zonelist));
extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
+/*
+ * We get the zone list from the current node and the gfp_mask.
+ * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
+ *
+ * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
+ * optimized to &contig_page_data at compile-time.
+ */
static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
{
- /*
- * Gets optimized away by the compiler.
- */
- if (order >= MAX_ORDER)
+ pg_data_t *pgdat = NODE_DATA(numa_node_id());
+ unsigned int idx = (gfp_mask & GFP_ZONEMASK);
+
+ if (unlikely(order >= MAX_ORDER))
return NULL;
- return _alloc_pages(gfp_mask, order);
+
+ return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + idx);
}
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/include/linux/mmzone.h 33-alloc_pages/include/linux/mmzone.h
--- 32-free_area_init/include/linux/mmzone.h Fri Sep 6 23:21:51 2002
+++ 33-alloc_pages/include/linux/mmzone.h Sat Sep 7 09:18:01 2002
@@ -10,11 +10,14 @@
#include <linux/wait.h>
#include <linux/cache.h>
#include <asm/atomic.h>
+#ifdef CONFIG_DISCONTIGMEM
+#include <asm/numnodes.h>
+#endif
+#ifndef MAX_NUMNODES
+#define MAX_NUMNODES 1
+#endif
-/*
- * Free memory management - zoned buddy allocator.
- */
-
+/* Free memory management - zoned buddy allocator. */
#ifndef CONFIG_FORCE_MAX_ZONEORDER
#define MAX_ORDER 15
#else
@@ -137,7 +140,7 @@
* footprint of this construct is very small.
*/
struct zonelist {
- struct zone *zones[MAX_NR_ZONES+1]; // NULL delimited
+ struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
};
#define GFP_ZONEMASK 0x0f
@@ -190,6 +193,7 @@
extern void free_area_init_core(pg_data_t *pgdat, unsigned long *zones_size,
unsigned long *zholes_size);
void get_zone_counts(unsigned long *active, unsigned long *inactive);
+extern void build_all_zonelists(void);
extern pg_data_t contig_page_data;
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/init/main.c 33-alloc_pages/init/main.c
--- 32-free_area_init/init/main.c Fri Sep 6 22:40:51 2002
+++ 33-alloc_pages/init/main.c Sat Sep 7 09:03:33 2002
@@ -396,6 +396,7 @@
printk(linux_banner);
setup_arch(&command_line);
setup_per_cpu_areas();
+ build_all_zonelists();
printk("Kernel command line: %s\n", saved_command_line);
parse_options(command_line);
trap_init();
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/kernel/ksyms.c 33-alloc_pages/kernel/ksyms.c
--- 32-free_area_init/kernel/ksyms.c Fri Sep 6 23:03:19 2002
+++ 33-alloc_pages/kernel/ksyms.c Sat Sep 7 09:20:18 2002
@@ -89,7 +89,6 @@
EXPORT_SYMBOL(exit_mm);
/* internal kernel memory management */
-EXPORT_SYMBOL(_alloc_pages);
EXPORT_SYMBOL(__alloc_pages);
EXPORT_SYMBOL(alloc_pages_node);
EXPORT_SYMBOL(__get_free_pages);
@@ -116,6 +115,7 @@
EXPORT_SYMBOL(vmalloc_to_page);
EXPORT_SYMBOL(remap_page_range);
#ifndef CONFIG_DISCONTIGMEM
+EXPORT_SYMBOL(contig_page_data);
EXPORT_SYMBOL(mem_map);
EXPORT_SYMBOL(max_mapnr);
#endif
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/mm/numa.c 33-alloc_pages/mm/numa.c
--- 32-free_area_init/mm/numa.c Fri Sep 6 23:03:19 2002
+++ 33-alloc_pages/mm/numa.c Sat Sep 7 09:03:33 2002
@@ -85,48 +85,4 @@
memset(pgdat->valid_addr_bitmap, 0, size);
}
-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask,
- unsigned int order)
-{
- return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK));
-}
-
-/*
- * This can be refined. Currently, tries to do round robin, instead
- * should do concentratic circle search, starting from current node.
- */
-struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
-{
- struct page *ret = 0;
- pg_data_t *start, *temp;
-#ifndef CONFIG_NUMA
- unsigned long flags;
- static pg_data_t *next = 0;
-#endif
-
- if (order >= MAX_ORDER)
- return NULL;
-#ifdef CONFIG_NUMA
- temp = NODE_DATA(numa_node_id());
-#else
- if (!next)
- next = pgdat_list;
- temp = next;
- next = next->pgdat_next;
-#endif
- start = temp;
- while (temp) {
- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
- return(ret);
- temp = temp->pgdat_next;
- }
- temp = pgdat_list;
- while (temp != start) {
- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
- return(ret);
- temp = temp->pgdat_next;
- }
- return(0);
-}
-
#endif /* CONFIG_DISCONTIGMEM */
diff -urN -X /home/mbligh/.diff.exclude 32-free_area_init/mm/page_alloc.c 33-alloc_pages/mm/page_alloc.c
--- 32-free_area_init/mm/page_alloc.c Fri Sep 6 23:28:41 2002
+++ 33-alloc_pages/mm/page_alloc.c Sat Sep 7 11:41:03 2002
@@ -256,14 +256,6 @@
}
#endif /* CONFIG_SOFTWARE_SUSPEND */
-#ifndef CONFIG_DISCONTIGMEM
-struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
-{
- return __alloc_pages(gfp_mask, order,
- contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
-}
-#endif
-
static /* inline */ struct page *
balance_classzone(struct zone* classzone, unsigned int gfp_mask,
unsigned int order, int * freed)
@@ -679,13 +671,41 @@
/*
* Builds allocation fallback zone lists.
*/
-static inline void build_zonelists(pg_data_t *pgdat)
+static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
+{
+ switch (k) {
+ struct zone *zone;
+ default:
+ BUG();
+ case ZONE_HIGHMEM:
+ zone = pgdat->node_zones + ZONE_HIGHMEM;
+ if (zone->size) {
+#ifndef CONFIG_HIGHMEM
+ BUG();
+#endif
+ zonelist->zones[j++] = zone;
+ }
+ case ZONE_NORMAL:
+ zone = pgdat->node_zones + ZONE_NORMAL;
+ if (zone->size)
+ zonelist->zones[j++] = zone;
+ case ZONE_DMA:
+ zone = pgdat->node_zones + ZONE_DMA;
+ if (zone->size)
+ zonelist->zones[j++] = zone;
+ }
+
+ return j;
+}
+
+static void __init build_zonelists(pg_data_t *pgdat)
{
- int i, j, k;
+ int i, j, k, node, local_node;
+ local_node = pgdat->node_id;
+ printk("Building zonelist for node : %d\n", local_node);
for (i = 0; i <= GFP_ZONEMASK; i++) {
struct zonelist *zonelist;
- struct zone *zone;
zonelist = pgdat->node_zonelists + i;
memset(zonelist, 0, sizeof(*zonelist));
@@ -697,33 +717,32 @@
if (i & __GFP_DMA)
k = ZONE_DMA;
- switch (k) {
- default:
- BUG();
- /*
- * fallthrough:
- */
- case ZONE_HIGHMEM:
- zone = pgdat->node_zones + ZONE_HIGHMEM;
- if (zone->size) {
-#ifndef CONFIG_HIGHMEM
- BUG();
-#endif
- zonelist->zones[j++] = zone;
- }
- case ZONE_NORMAL:
- zone = pgdat->node_zones + ZONE_NORMAL;
- if (zone->size)
- zonelist->zones[j++] = zone;
- case ZONE_DMA:
- zone = pgdat->node_zones + ZONE_DMA;
- if (zone->size)
- zonelist->zones[j++] = zone;
- }
+ j = build_zonelists_node(pgdat, zonelist, j, k);
+ /*
+ * Now we build the zonelist so that it contains the zones
+ * of all the other nodes.
+ * We don't want to pressure a particular node, so when
+ * building the zones for node N, we make sure that the
+ * zones coming right after the local ones are those from
+ * node N+1 (modulo N)
+ */
+ for (node = local_node + 1; node < numnodes; node++)
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+ for (node = 0; node < local_node; node++)
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+
zonelist->zones[j++] = NULL;
}
}
+void __init build_all_zonelists(void)
+{
+ int i;
+
+ for(i = 0 ; i < numnodes ; i++)
+ build_zonelists(NODE_DATA(i));
+}
+
void __init calculate_totalpages (pg_data_t *pgdat, unsigned long *zones_size,
unsigned long *zholes_size)
{
@@ -919,7 +938,6 @@
(unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
}
}
- build_zonelists(pgdat);
}
#ifndef CONFIG_DISCONTIGMEM
reply other threads:[~2002-09-08 5:08 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='185353754.1031436512@[10.10.2.3]' \
--to=fletch@aracnet.com \
--cc=akpm@zip.com.au \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox