linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <clameter@sgi.com>,
	Linux Memory Management List <linux-mm@kvack.org>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	ak@suse.de, akpm@linux-foundation.org, pj@sgi.com
Subject: Re: NUMA policy issues with ZONE_MOVABLE
Date: Sat, 28 Jul 2007 12:57:09 +0100 (IST)	[thread overview]
Message-ID: <Pine.LNX.4.64.0707281255480.7824@skynet.skynet.ie> (raw)
In-Reply-To: <20070728162844.9d5b8c6e.kamezawa.hiroyu@jp.fujitsu.com>

On Sat, 28 Jul 2007, KAMEZAWA Hiroyuki wrote:

> On Fri, 27 Jul 2007 16:45:19 +0100
> mel@skynet.ie (Mel Gorman) wrote:
>
>> Obvious things that are outstanding;
>>
>> o Compile-test parisc
>> o Split patch in two to keep the zone_idx changes separetly
>> o Verify zlccache is not broken
>> o Have a version of __alloc_pages take a nodemask and ditch
>>   bind_zonelist()
>>
>> I can work on bringing this up to scratch during the cycle.
>>
>> Patch as follows. Comments?
>>
>
> I like this idea in general. My concern is zonelist scan cost.
> Hmm, can this be help ?
>

Does this not make the assumption that the zonelists are in zone-order as 
opposed to node? i.e. that is is

H1N1D1H2N2D2H3N3D3 instead of
H1H2H3N1N2N3D1D2D3

If it's node-order, does this scheme break?

> ---
> include/linux/mmzone.h |    1
> mm/page_alloc.c        |   51 +++++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 50 insertions(+), 2 deletions(-)
>
> Index: linux-2.6.23-rc1.test/include/linux/mmzone.h
> ===================================================================
> --- linux-2.6.23-rc1.test.orig/include/linux/mmzone.h
> +++ linux-2.6.23-rc1.test/include/linux/mmzone.h
> @@ -406,6 +406,7 @@ struct zonelist_cache;
>
> struct zonelist {
> 	struct zonelist_cache *zlcache_ptr;		     // NULL or &zlcache
> +	unsigned short gfp_skip[MAX_NR_ZONES];
> 	struct zone *zones[MAX_ZONES_PER_ZONELIST + 1];      // NULL delimited
> #ifdef CONFIG_NUMA
> 	struct zonelist_cache zlcache;			     // optional ...
> Index: linux-2.6.23-rc1.test/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.23-rc1.test.orig/mm/page_alloc.c
> +++ linux-2.6.23-rc1.test/mm/page_alloc.c
> @@ -1158,13 +1158,14 @@ get_page_from_freelist(gfp_t gfp_mask, u
> 	int zlc_active = 0;		/* set if using zonelist_cache */
> 	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
> 	enum zone_type highest_zoneidx = gfp_zone(gfp_mask);
> +	int default_skip = zonelist->gfp_skip[highest_zoneidx];
>
> zonelist_scan:
> 	/*
> 	 * Scan zonelist, looking for a zone with enough free.
> 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
> 	 */
> -	z = zonelist->zones;
> +	z = zonelist->zones + default_skip;
>
> 	do {
> 		if (should_filter_zone(*z, highest_zoneidx))
> @@ -1235,6 +1236,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
> 	int do_retry;
> 	int alloc_flags;
> 	int did_some_progress;
> +	int gfp_skip = zonelist->gfp_skip[gfp_zone(gfp_mask)];
>
> 	might_sleep_if(wait);
>
> @@ -1265,7 +1267,7 @@ restart:
> 	if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
> 		goto nopage;
>
> -	for (z = zonelist->zones; *z; z++)
> +	for (z = zonelist->zones + gfp_skip; *z; z++)
> 		wakeup_kswapd(*z, order);
>
> 	/*
> @@ -2050,6 +2052,50 @@ static void build_zonelist_cache(pg_data
>
> #endif	/* CONFIG_NUMA */
>
> +static inline
> +unsigned short find_first_zone(enum zone_type target, struct zonelist *zl)
> +{
> +	unsigned short index = 0;
> +	struct zone *z;
> +	z = zl->zones[index];
> +	while (z != NULL) {
> +		if (!should_filter_zone(z, target))
> +			return index;
> +		z = zl->zones[++index];
> +	}
> +	return 0;
> +}
> +/*
> + * record the first available zone per gfp.
> + */
> +
> +static void build_zonelist_skip(pg_data_t *pgdat)
> +{
> +	enum zone_type target;
> +	unsigned short index;
> +	struct zonelist *zl = &pgdat->node_zonelist;
> +
> +	target = gfp_zone(GFP_KERNEL|GFP_DMA);
> +	index = find_first_zone(target, zl);
> +	zl->gfp_skip[target] = index;
> +
> +	target = gfp_zone(GFP_KERNEL|GFP_DMA32);
> +	index = find_first_zone(target, zl);
> +	zl->gfp_skip[target] = index;
> +
> +	target = gfp_zone(GFP_KERNEL);
> +	index = find_first_zone(target, zl);
> +	zl->gfp_skip[target] = index;
> +
> +	target = gfp_zone(GFP_HIGHUSER);
> +	index = find_first_zone(target, zl);
> +	zl->gfp_skip[target] = index;
> +
> +	target = gfp_zone(GFP_HIGHUSER_MOVABLE);
> +	index = find_first_zone(target, zl);
> +	zl->gfp_skip[target] = index;
> +}
> +
> /* return values int ....just for stop_machine_run() */
> static int __build_all_zonelists(void *dummy)
> {
> @@ -2058,6 +2104,7 @@ static int __build_all_zonelists(void *d
> 	for_each_online_node(nid) {
> 		build_zonelists(NODE_DATA(nid));
> 		build_zonelist_cache(NODE_DATA(nid));
> +		build_zonelist_skip(NODE_DATA(nid));
> 	}
> 	return 0;
> }
>

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2007-07-28 11:57 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-25  4:20 Christoph Lameter
2007-07-25  4:47 ` Nick Piggin
2007-07-25  5:05   ` Christoph Lameter
2007-07-25  5:24     ` Nick Piggin
2007-07-25  6:00       ` Christoph Lameter
2007-07-25  6:09         ` Nick Piggin
2007-07-25  9:32       ` Andi Kleen
2007-07-25  6:36 ` KAMEZAWA Hiroyuki
2007-07-25 11:16 ` Mel Gorman
2007-07-25 14:30   ` Lee Schermerhorn
2007-07-25 19:31   ` Christoph Lameter
2007-07-26  4:15     ` KAMEZAWA Hiroyuki
2007-07-26  4:53       ` Christoph Lameter
2007-07-26  7:41         ` KAMEZAWA Hiroyuki
2007-07-26 16:16       ` Mel Gorman
2007-07-26 18:03         ` Christoph Lameter
2007-07-26 18:26           ` Mel Gorman
2007-07-26 13:23     ` Mel Gorman
2007-07-26 18:07       ` Christoph Lameter
2007-07-26 22:59         ` Mel Gorman
2007-07-27  1:22           ` Christoph Lameter
2007-07-27  8:20             ` Mel Gorman
2007-07-27 15:45               ` Mel Gorman
2007-07-27 17:35                 ` Christoph Lameter
2007-07-27 17:46                   ` Mel Gorman
2007-07-27 18:38                     ` Christoph Lameter
2007-07-27 18:00                   ` [PATCH] Document Linux Memory Policy - V2 Lee Schermerhorn
2007-07-27 18:38                     ` Randy Dunlap
2007-07-27 19:01                       ` Lee Schermerhorn
2007-07-27 19:21                         ` Randy Dunlap
2007-07-27 18:55                     ` Christoph Lameter
2007-07-27 19:24                       ` Lee Schermerhorn
2007-07-31 15:14                     ` Mel Gorman
2007-07-31 16:34                       ` Lee Schermerhorn
2007-07-31 19:10                         ` Christoph Lameter
2007-07-31 19:46                           ` Lee Schermerhorn
2007-07-31 19:58                             ` Christoph Lameter
2007-07-31 20:23                               ` Lee Schermerhorn
2007-07-31 20:48                         ` [PATCH] Document Linux Memory Policy - V3 Lee Schermerhorn
2007-08-03 13:52                           ` Mel Gorman
2007-07-28  7:28                 ` NUMA policy issues with ZONE_MOVABLE KAMEZAWA Hiroyuki
2007-07-28 11:57                   ` Mel Gorman [this message]
2007-07-28 14:10                     ` KAMEZAWA Hiroyuki
2007-07-28 14:21                       ` KAMEZAWA Hiroyuki
2007-07-30 12:41                         ` Mel Gorman
2007-07-30 18:06                           ` Christoph Lameter
2007-07-27 14:24           ` Lee Schermerhorn
2007-08-01 18:59           ` Lee Schermerhorn
2007-08-02  0:36             ` KAMEZAWA Hiroyuki
2007-08-02 17:10             ` Mel Gorman
2007-08-02 17:51               ` Lee Schermerhorn
2007-07-26 18:09       ` Lee Schermerhorn
2007-08-02 14:09     ` Mel Gorman
2007-08-02 18:56       ` Christoph Lameter
2007-08-02 19:42         ` Mel Gorman
2007-08-02 19:52           ` Christoph Lameter
2007-08-03  9:32             ` Mel Gorman
2007-08-03 16:36               ` Christoph Lameter
2007-07-25 14:27 ` Lee Schermerhorn
2007-07-25 17:39   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0707281255480.7824@skynet.skynet.ie \
    --to=mel@csn.ul.ie \
    --cc=Lee.Schermerhorn@hp.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=clameter@sgi.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=pj@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox