linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: David Rientjes <rientjes@google.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: linux-mm@kvack.org, mel@skynet.ie, clameter@engr.sgi.com,
	akpm@linux-foundation.org
Subject: Re: [RFC} memory unplug patchset prep [9/16] create movable zone at boot
Date: Tue, 6 Mar 2007 08:06:48 -0800 (PST)	[thread overview]
Message-ID: <Pine.LNX.4.64.0703060139570.22477@chino.kir.corp.google.com> (raw)
In-Reply-To: <20070306135232.42a55807.kamezawa.hiroyu@jp.fujitsu.com>

On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:

> This patch adds codes for creating movable zones.
> 
> Add 2 kernel paramers.
> - kernel_core_pages=XXX[KMG]
> - kernel_core_ratio=xx
> 

These would never be specified together, right?

> Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> @@ -137,12 +137,16 @@ static unsigned long __initdata dma_rese
>    int __initdata nr_nodemap_entries;
>    unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
>    unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
> +  unsigned long __initdata lowest_movable_pfn[MAX_NUMNODES];
> +  unsigned long kernel_core_ratio;
> +  unsigned long kernel_core_pages;
>  #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
>    unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES];
>    unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES];
>  #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
>  #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
>  
> +
>  #ifdef CONFIG_DEBUG_VM
>  static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
>  {

You could probably get away with:

	union {
		unsigned long kernel_core_ratio;
		unsigned long kernel_core_pages;
	};

> @@ -2604,6 +2608,8 @@ void __init get_pfn_range_for_nid(unsign
>   */
>  unsigned long __init zone_spanned_pages_in_node(int nid,
>  					unsigned long zone_type,
> +					unsigned long *start_pfn,
> +					unsigned long *end_pfn,
>  					unsigned long *ignored)
>  {
>  	unsigned long node_start_pfn, node_end_pfn;
> @@ -2611,8 +2617,30 @@ unsigned long __init zone_spanned_pages_
>  
>  	/* Get the start and end of the node and zone */
>  	get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
> -	zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> -	zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> +	if (start_pfn)
> +		*start_pfn = 0;
> +	if (end_pfn)
> +		*end_pfn = 0;
> +	if (!is_configured_zone(ZONE_MOVABLE) ||
> +		   lowest_movable_pfn[nid] == 0) {
> +		/* we don't use ZONE_MOVABLE */
> +		zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> +		zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> +	} else if (zone_type == ZONE_MOVABLE) {
> +		zone_start_pfn = lowest_movable_pfn[nid];
> +		zone_end_pfn = node_end_pfn;
> +	} else {
> +		/* adjust range to lowest_movable_pfn[] */
> +		zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> +		zone_start_pfn = max(zone_start_pfn, node_start_pfn);
> +
> +		if (zone_start_pfn >= lowest_movable_pfn[nid])
> +			return 0;
> +		zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> +		zone_end_pfn = min(zone_end_pfn, node_end_pfn);
> +		if (zone_end_pfn > lowest_movable_pfn[nid])
> +			zone_end_pfn = lowest_movable_pfn[nid];
> +	}
>  
>  	/* Check that this node has pages within the zone's required range */
>  	if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)

These hacks of returning start_pfn and end_pfn depending on where it was 
called from and testing for things like start_pfn == end_pfn doesn't make 
much sense.  It'd probably be better to separate this logic out into a 
helper function and then call it from zone_absent_pages_in_node() and 
zone_spanned_pages_in_node(), respectively.

> @@ -2733,20 +2781,115 @@ static void __init calculate_node_totalp
>  	enum zone_type i;
>  
>  	for (i = 0; i < MAX_NR_ZONES; i++)
> -		totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
> +		totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, NULL, NULL,
>  								zones_size);
>  	pgdat->node_spanned_pages = totalpages;
>  
>  	realtotalpages = totalpages;
>  	for (i = 0; i < MAX_NR_ZONES; i++)
>  		realtotalpages -=
> -			zone_absent_pages_in_node(pgdat->node_id, i,
> +			zone_absent_pages_in_node(pgdat->node_id, i, 0, 0,
>  								zholes_size);
>  	pgdat->node_present_pages = realtotalpages;
>  	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
>  							realtotalpages);
>  }
>  
> +#ifdef CONFIG_ZONE_MOVABLE
> +
> +unsigned long calc_zone_alignment(unsigned long pfn)
> +{
> +#ifdef CONFIG_SPARSEMEM
> +	return (pfn + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK;
> +#else
> +	return (pfn + MAX_ORDER_NR_PAGES - 1) & ~(MAX_ORDER_NR_PAGES - 1)
> +#endif
> +}
> +

Another missing semicolon.

> +static void split_movable_pages(void)
> +{
> +	int i, nid;
> +	unsigned long total_pages, nr_pages, start_pfn, end_pfn, pfn;
> +	long core;
> +	for_each_online_node(nid) {
> +		lowest_movable_pfn[nid] = 0;
> +		pfn = 0;
> +		total_pages = 0;
> +		for_each_active_range_index_in_nid(i, nid) {
> +			start_pfn = early_node_map[i].start_pfn;
> +			end_pfn = early_node_map[i].end_pfn;
> +			total_pages += end_pfn - start_pfn;
> +		}
> +		core = total_pages * kernel_core_ratio/100;
> +		for_each_active_range_index_in_nid(i, nid) {
> +			start_pfn = early_node_map[i].start_pfn;
> +			end_pfn = early_node_map[i].end_pfn;
> +			nr_pages = end_pfn - start_pfn;
> +			if (nr_pages > core) {
> +				pfn = start_pfn + core;
> +				pfn = calc_zone_alignment(pfn);
> +				if (pfn < end_pfn) {
> +					lowest_movable_pfn[nid] = pfn;
> +					break;
> +				} else {
> +					core -= nr_pages;
> +					if (core < 0)
> +						core = 0;
> +				}
> +			} else {
> +				core -= nr_pages;
> +			}
> +		}
> +	}
> +	return;
> +}
> +
> +
> +static void reserve_movable_pages(void)
> +{
> +	memset(lowest_movable_pfn, 0, MAX_NUMNODES);
> +	if (kernel_core_pages) {
> +		alloc_core_pages_from_low();
> +	} else if (kernel_core_ratio) {
> +		split_movable_pages();
> +	}
> +	return;
> +}
> +#else
> +static void reserve_movable_pages(void)
> +{
> +	return;
> +}
> +#endif
>  /*
>   * Set up the zone data structures:
>   *   - mark all pages reserved

reserve_movable_pages() and it's two helper functions, 
alloc_core_pages_from_low() and split_movable_pages(), can be __init?

If so, then both kernel_core_pages and kernel_core_ratio should be 
__initdata.

> Index: devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/Documentation/kernel-parameters.txt
> +++ devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
> @@ -764,6 +764,17 @@ and is between 256 and 4096 characters. 
>  
>  	keepinitrd	[HW,ARM]
>  
> +	kernel_core_pages=nn[KMG] [KNL, BOOT] divide the whole memory into
> +			not-movable and movable. movable memory can be
> +			used only for page cache and user data. This option
> +			specifies the amount of not-movable pages, called core
> +			pages. core pages are allocated from the lower address.
> +
> +	kernel_core_ratio=nn [KND, BOOT] specifies the amount of the core
> +			pages(see kernel_core_pages) by the ratio against
> +			total memory. If NUMA, core pages are allocated for
> +			each node by this ratio. "0" is not allowed.
> +
>  	kstack=N	[IA-32,X86-64] Print N words from the kernel stack
>  			in oops dumps.
>  

This documentation doesn't specify that we can't use both parameters 
together even though we can't.

		David

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2007-03-06 16:06 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-06  4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
2007-03-06  4:42 ` [RFC} memory unplug patchset prep [1/16] zone ids cleanup KAMEZAWA Hiroyuki
2007-03-06 15:36   ` David Rientjes
2007-03-07  1:21     ` KAMEZAWA Hiroyuki
2007-03-06  4:43 ` [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage() KAMEZAWA Hiroyuki
2007-03-06 15:54   ` David Rientjes
2007-03-07  1:46     ` KAMEZAWA Hiroyuki
2007-03-06  4:44 ` [RFC} memory unplug patchset prep [3/16] define is_identity_mapped KAMEZAWA Hiroyuki
2007-03-06 15:55   ` David Rientjes
2007-03-07  1:48     ` KAMEZAWA Hiroyuki
2007-03-06  4:45 ` [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE KAMEZAWA Hiroyuki
2007-03-06 16:06   ` David Rientjes
2007-03-07  1:51     ` KAMEZAWA Hiroyuki
2007-03-06  4:47 ` [RFC} memory unplug patchset prep [5/16] GFP_MOVABLE KAMEZAWA Hiroyuki
2007-03-06  4:48 ` [RFC} memory unplug patchset prep [6/16] alloc_zeroed_user_high_movable KAMEZAWA Hiroyuki
2007-03-06  4:49 ` [RFC} memory unplug patchset prep [7/16] change caller's gfp_mask KAMEZAWA Hiroyuki
2007-03-06  4:50 ` [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE KAMEZAWA Hiroyuki
2007-03-06 16:11   ` David Rientjes
2007-03-07  1:55     ` KAMEZAWA Hiroyuki
2007-03-06  4:52 ` [RFC} memory unplug patchset prep [9/16] create movable zone at boot KAMEZAWA Hiroyuki
2007-03-06 16:06   ` David Rientjes [this message]
2007-03-07  2:02     ` KAMEZAWA Hiroyuki
2007-03-06  4:53 ` [RFC} memory unplug patchset prep [10/16] ia64 support KAMEZAWA Hiroyuki
2007-03-06  4:55 ` [RFC} memory unplug patchset prep [11/16] page isolation core KAMEZAWA Hiroyuki
2007-03-06  4:56 ` [RFC} memory unplug patchset prep [12/16] drain all pages KAMEZAWA Hiroyuki
2007-03-06  4:57 ` [RFC} memory unplug patchset prep [13/16] isolate freed pages KAMEZAWA Hiroyuki
2007-03-06  4:59 ` [RFC} memory unplug patchset prep [14/16] memory unplug core KAMEZAWA Hiroyuki
2007-03-06  5:00 ` [RFC} memory unplug patchset prep [15/16] hot-unplug interface for ia64 KAMEZAWA Hiroyuki
2007-03-06  5:02 ` [RFC} memory unplug patchset prep [16/16] migration nocontext KAMEZAWA Hiroyuki
2007-03-06 15:24 ` [RFC} memory unplug patchset prep [0/16] David Rientjes
2007-03-07  2:24   ` KAMEZAWA Hiroyuki
2007-03-07  2:31     ` David Rientjes
2007-03-07  2:44       ` KAMEZAWA Hiroyuki
2007-03-07 19:44       ` Mark Gross

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0703060139570.22477@chino.kir.corp.google.com \
    --to=rientjes@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=clameter@engr.sgi.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mel@skynet.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox