linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Dave Hansen <haveblue@us.ibm.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	lhms-devel@lists.sourceforge.net
Subject: Re: [PATCH 4/7] ppc64 - Specify amount of kernel memory at boot time
Date: Wed, 22 Feb 2006 16:43:57 +0000 (GMT)	[thread overview]
Message-ID: <Pine.LNX.4.64.0602221625100.2801@skynet.skynet.ie> (raw)
In-Reply-To: <1140543359.8693.32.camel@localhost.localdomain>

On Tue, 21 Feb 2006, Dave Hansen wrote:

> On Tue, 2006-02-21 at 14:51 +0000, Mel Gorman wrote:
>> A new release of patches is a long time away but here is an early draft of
>> what the above currently looks like. Is this more or less what you were
>> thinking?
>
> I think it may be a bit harder to understand than even the other
> one.  :(
>

:/

> In a nutshell, get_zones_info() tries to do too much.  Six function
> arguments should be a big, red, warning light that something is really
> wrong.  Calling a function _info() is another bad sign.  It means that
> you can't discretely describe what it does.
>
> Remember, there are 3 distinct tasks here:
>
> 1. size the node information from init_node_data[]
> 2. size the easy reclaim zone based on the boot parameters
> 3. take holes into account when doing the reclaim zone sizing
>

right

*some pounding on the keyboard*

Is this a bit clearer? It's built and boot tested on one ppc64 machine. I 
am having trouble finding a ppc64 machine that *has* memory holes to be 
100% sure it's ok.

diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.16-rc3-mm1-103_x86coremem/arch/powerpc/mm/numa.c linux-2.6.16-rc3-mm1-104_ppc64coremem/arch/powerpc/mm/numa.c
--- linux-2.6.16-rc3-mm1-103_x86coremem/arch/powerpc/mm/numa.c	2006-02-16 09:50:42.000000000 +0000
+++ linux-2.6.16-rc3-mm1-104_ppc64coremem/arch/powerpc/mm/numa.c	2006-02-22 16:07:35.000000000 +0000
@@ -17,10 +17,12 @@
  #include <linux/nodemask.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
+#include <linux/sort.h>
  #include <asm/sparsemem.h>
  #include <asm/lmb.h>
  #include <asm/system.h>
  #include <asm/smp.h>
+#include <asm/machdep.h>

  static int numa_enabled = 1;

@@ -101,22 +103,41 @@ void __init add_region(unsigned int nid,
  	init_node_data[i].nid = nid;
  }

+/* Compare two elements in init_node_data. Assume start_pfn is at start */
+static int cmp_init_node_data(const void *a, const void *b)
+{
+	return *(unsigned long *)a - *(unsigned long *)b;
+}
+
+/*
+ * init_node_data is not necessarilly in pfn order making it difficult to
+ * determine where the EasyRclm should begin if it is requested. This sorts
+ * init_node_data by start_pfn
+ */
+void __init sort_regions(void)
+{
+	size_t num = 0;
+	size_t size_element = sizeof(init_node_data) / MAX_REGIONS;
+
+	while (init_node_data[num].end_pfn)
+		num++;
+
+	sort(init_node_data, num, size_element, cmp_init_node_data, NULL);
+}
+
  /* We assume init_node_data has no overlapping regions */
  void __init get_region(unsigned int nid, unsigned long *start_pfn,
-		       unsigned long *end_pfn, unsigned long *pages_present)
+		       unsigned long *end_pfn)
  {
  	unsigned int i;

  	*start_pfn = -1UL;
-	*end_pfn = *pages_present = 0;
+	*end_pfn = 0;

  	for (i = 0; init_node_data[i].end_pfn; i++) {
  		if (init_node_data[i].nid != nid)
  			continue;

-		*pages_present += init_node_data[i].end_pfn -
-			init_node_data[i].start_pfn;
-
  		if (init_node_data[i].start_pfn < *start_pfn)
  			*start_pfn = init_node_data[i].start_pfn;

@@ -129,6 +150,88 @@ void __init get_region(unsigned int nid,
  		*start_pfn = 0;
  }

+/* Initialise the size of each zone in a node */
+void __init zone_sizes_init(unsigned int nid,
+		unsigned long kernelcore_pages,
+		unsigned long *zones_size)
+{
+	unsigned int i;
+	unsigned long pages_present = 0;
+
+	/* Get the number of present pages in the node */
+	for (i = 0; init_node_data[i].end_pfn; i++) {
+		if (init_node_data[i].nid != nid)
+			continue;
+
+		pages_present += init_node_data[i].end_pfn -
+			init_node_data[i].start_pfn;
+	}
+
+	if (kernelcore_pages && kernelcore_pages < pages_present) {
+		zones_size[ZONE_DMA] = kernelcore_pages;
+		zones_size[ZONE_EASYRCLM] = pages_present - kernelcore_pages;
+	} else {
+		zones_size[ZONE_DMA] = pages_present;
+		zones_size[ZONE_EASYRCLM] = 0;
+	}
+}
+
+void __init get_zholes_size(unsigned int nid, unsigned long *zones_size,
+		unsigned long *zholes_size) {
+	unsigned int i = 0;
+	unsigned int start_easyrclm_pfn;
+	unsigned long last_end_pfn, first;
+
+	/* Find where the PFN of the end of DMA is */
+	unsigned long pages_count = zones_size[ZONE_DMA];
+	for (i = 0; init_node_data[i].end_pfn; i++) {
+		unsigned long segment_size;
+		if (init_node_data[i].nid != nid)
+			continue;
+
+		/*
+		 * Check if the end of ZONE_DMA is in this segment of the
+		 * init_node_data
+		 */
+		segment_size = init_node_data[i].end_pfn -
+			init_node_data[i].start_pfn;
+		if (pages_count > segment_size) {
+			/* End of ZONE_DMA is not here, move on */
+			pages_count -= segment_size;
+			continue;
+		}
+
+		/* End of ZONE_DMA is here */
+		start_easyrclm_pfn = init_node_data[i].start_pfn + pages_count;
+		break;
+	}
+
+	/* Walk the map again and get the size of the holes */
+	first = 1;
+	zholes_size[ZONE_DMA] = 0;
+	zholes_size[ZONE_EASYRCLM] = 0;
+	for (i = 1; init_node_data[i].end_pfn; i++) {
+		unsigned long hole_size;
+		if (init_node_data[i].nid != nid)
+			continue;
+
+		if (first) {
+			last_end_pfn = init_node_data[i].end_pfn;
+			first = 0;
+			continue;
+		}
+
+		/* Hole found */
+		hole_size = init_node_data[i].start_pfn - last_end_pfn;
+		if (init_node_data[i].start_pfn < start_easyrclm_pfn) {
+			zholes_size[ZONE_DMA] += hole_size;
+		} else {
+			zholes_size[ZONE_EASYRCLM] += hole_size;
+		}
+		last_end_pfn = init_node_data[i].end_pfn;
+	}
+}
+
  static inline void map_cpu_to_node(int cpu, int node)
  {
  	numa_cpu_lookup_table[cpu] = node;
@@ -622,11 +725,11 @@ void __init do_init_bootmem(void)
  	register_cpu_notifier(&ppc64_numa_nb);

  	for_each_online_node(nid) {
-		unsigned long start_pfn, end_pfn, pages_present;
+		unsigned long start_pfn, end_pfn;
  		unsigned long bootmem_paddr;
  		unsigned long bootmap_pages;

-		get_region(nid, &start_pfn, &end_pfn, &pages_present);
+		get_region(nid, &start_pfn, &end_pfn);

  		/* Allocate the node structure node local if possible */
  		NODE_DATA(nid) = careful_allocation(nid,
@@ -721,21 +824,36 @@ void __init paging_init(void)
  {
  	unsigned long zones_size[MAX_NR_ZONES];
  	unsigned long zholes_size[MAX_NR_ZONES];
+	unsigned long kernelcore_pages = 0;
  	int nid;
+	char *opt;

  	memset(zones_size, 0, sizeof(zones_size));
  	memset(zholes_size, 0, sizeof(zholes_size));

-	for_each_online_node(nid) {
-		unsigned long start_pfn, end_pfn, pages_present;
-
-		get_region(nid, &start_pfn, &end_pfn, &pages_present);
+	/* Check if ZONE_EASYRCLM should be populated */
+	opt = strstr(cmd_line, "kernelcore=");
+	if (opt) {
+		opt += 11;
+		unsigned long size_bytes = memparse(opt, &opt);
+		kernelcore_pages = size_bytes >> PAGE_SHIFT;
+	}

-		zones_size[ZONE_DMA] = end_pfn - start_pfn;
-		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
+	sort_regions();
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_region_pfn;

-		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
-		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
+		get_region(nid, &start_pfn, &end_region_pfn);
+		zone_sizes_init(nid, kernelcore_pages, &zones_size[0]);
+		get_zholes_size(nid, &zones_size[0], &zholes_size[0]);
+
+		dbg("free_area_init DMA      node %d %lx %lx (hole: %lx)\n",
+		    nid, zones_size[ZONE_DMA],
+		    start_pfn, zholes_size[ZONE_DMA]);
+
+		dbg("free_area_init EasyRclm node %d %lx %lx (hole: %lx)\n",
+		    nid, zones_size[ZONE_EASYRCLM],
+		    start_pfn, zholes_size[ZONE_EASYRCLM]);

  		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
  				    zholes_size);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2006-02-22 16:43 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-02-17 14:15 [PATCH 0/7] Reducing fragmentation using zones v5 Mel Gorman
2006-02-17 14:16 ` [PATCH 1/7] Add __GFP_EASYRCLM flag and update callers Mel Gorman
2006-02-17 14:16 ` [PATCH 2/7] Create the ZONE_EASYRCLM zone Mel Gorman
2006-02-17 14:16 ` [PATCH 3/7] x86 - Specify amount of kernel memory at boot time Mel Gorman
2006-02-17 14:17 ` [PATCH 4/7] ppc64 " Mel Gorman
2006-02-17 17:16   ` Dave Hansen
2006-02-17 19:03     ` Mel Gorman
2006-02-17 19:17       ` Dave Hansen
2006-02-17 19:36         ` Mel Gorman
2006-02-17 21:31     ` Joel Schopp
2006-02-21 14:51     ` Mel Gorman
2006-02-21 17:35       ` Dave Hansen
2006-02-22 16:43         ` Mel Gorman [this message]
2006-02-23 16:42           ` Dave Hansen
2006-02-23 17:19             ` Mel Gorman
2006-02-23 17:38               ` Dave Hansen
2006-02-23 18:01                 ` Mel Gorman
2006-02-23 18:15                   ` Dave Hansen
2006-02-24  0:15                     ` KAMEZAWA Hiroyuki
2006-02-24  9:04                     ` Mel Gorman
2006-02-23 17:40               ` Mike Kravetz
2006-02-17 14:17 ` [PATCH 5/7] At boot, determine what zone memory will hot-add to Mel Gorman
2006-02-17 14:17 ` [PATCH 6/7] Allow HugeTLB allocations to use ZONE_EASYRCLM Mel Gorman
2006-02-17 14:18 ` [PATCH 7/7] Add documentation for extra boot parameters Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0602221625100.2801@skynet.skynet.ie \
    --to=mel@csn.ul.ie \
    --cc=haveblue@us.ibm.com \
    --cc=lhms-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox