linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Miao Xie <miaox@cn.fujitsu.com>
To: David Rientjes <rientjes@google.com>
Cc: Nick Piggin <npiggin@suse.de>,
	Lee Schermerhorn <lee.schermerhorn@hp.com>,
	Paul Menage <menage@google.com>,
	Linux-Kernel <linux-kernel@vger.kernel.org>,
	Linux-MM <linux-mm@kvack.org>, Li Zefan <lizf@cn.fujitsu.com>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH V2 4/4] cpuset,mm: update task's mems_allowed lazily
Date: Thu, 01 Apr 2010 10:16:40 +0800	[thread overview]
Message-ID: <4BB40208.4010904@cn.fujitsu.com> (raw)
In-Reply-To: <alpine.DEB.2.00.1003310324550.17661@chino.kir.corp.google.com>

on 2010-3-31 18:34, David Rientjes wrote:
> On Wed, 31 Mar 2010, Miao Xie wrote:
> 
>> diff --git a/mm/mmzone.c b/mm/mmzone.c
>> index f5b7d17..43ac21b 100644
>> --- a/mm/mmzone.c
>> +++ b/mm/mmzone.c
>> @@ -58,6 +58,7 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
>>  					nodemask_t *nodes,
>>  					struct zone **zone)
>>  {
>> +	nodemask_t tmp_nodes;
>>  	/*
>>  	 * Find the next suitable zone to use for the allocation.
>>  	 * Only filter based on nodemask if it's set
>> @@ -65,10 +66,16 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
>>  	if (likely(nodes == NULL))
>>  		while (zonelist_zone_idx(z) > highest_zoneidx)
>>  			z++;
>> -	else
>> -		while (zonelist_zone_idx(z) > highest_zoneidx ||
>> -				(z->zone && !zref_in_nodemask(z, nodes)))
>> -			z++;
>> +	else {
>> +		tmp_nodes = *nodes;
>> +		if (nodes_empty(tmp_nodes))
>> +			while (zonelist_zone_idx(z) > highest_zoneidx)
>> +				z++;
>> +		else
>> +			while (zonelist_zone_idx(z) > highest_zoneidx ||
>> +				(z->zone && !zref_in_nodemask(z, &tmp_nodes)))
>> +				z++;
>> +	}
>>  
>>  	*zone = zonelist_zone(z);
>>  	return z;
> 
> Unfortunately, you can't allocate a nodemask_t on the stack here because 
> this is used in the iteration for get_page_from_freelist() which can occur 
> very deep in the stack already and there's a probability of overflow.  
> Dynamically allocating a nodemask_t simply wouldn't scale here, either, 
> since it would allocate on every iteration of a zonelist.
> 

Maybe it is better to fix this problem by checking this function's return
value, because this function will return NULL if seeing an empty nodemask.

The new patch is attached below.
---
 kernel/cpuset.c |   11 +++++++++--
 mm/mempolicy.c  |   28 +++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d109467..fbb1f1c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -952,8 +952,6 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 static void cpuset_change_task_nodemask(struct task_struct *tsk,
 					nodemask_t *newmems)
 {
-	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
-	mpol_rebind_task(tsk, &tsk->mems_allowed);
 	mpol_rebind_task(tsk, newmems);
 	tsk->mems_allowed = *newmems;
 }
@@ -2442,6 +2440,15 @@ int cpuset_mem_spread_node(void)
 	node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
 	if (node == MAX_NUMNODES)
 		node = first_node(current->mems_allowed);
+
+	/*
+	 * if node is still MAX_NUMNODES, that means the kernel allocator saw
+	 * an empty nodemask. In that case, the kernel allocator allocate
+	 * memory on the current node.
+	 */
+	if (unlikely(node == MAX_NUMNODES))
+		node = numa_node_id();
+
 	current->cpuset_mem_spread_rotor = node;
 	return node;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8034abd..0905b84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1449,8 +1449,16 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
 		 * the first node in the mask instead.
 		 */
 		if (unlikely(gfp & __GFP_THISNODE) &&
-				unlikely(!node_isset(nd, policy->v.nodes)))
+				unlikely(!node_isset(nd, policy->v.nodes))) {
 			nd = first_node(policy->v.nodes);
+			/*
+			 * When rebinding task->mempolicy, th kernel allocator
+			 * may see an empty nodemask, and first_node() returns
+			 * MAX_NUMNODES, In that case, we will use current node.
+			 */
+			if (unlikely(nd == MAX_NUMNODES))
+				nd = numa_node_id();
+		}
 		break;
 	case MPOL_INTERLEAVE: /* should not happen */
 		break;
@@ -1510,6 +1518,14 @@ unsigned slab_node(struct mempolicy *policy)
 		(void)first_zones_zonelist(zonelist, highest_zoneidx,
 							&policy->v.nodes,
 							&zone);
+		/*
+		 * When rebinding task->mempolicy, th kernel allocator
+		 * may see an empty nodemask, and first_zones_zonelist()
+		 * returns a NULL zone, In that case, we will use current
+		 * node.
+		 */
+		if (unlikely(zone == NULL))
+			return numa_node_id();
 		return zone->node;
 	}
 
@@ -1529,10 +1545,18 @@ static unsigned offset_il_node(struct mempolicy *pol,
 
 	if (!nnodes)
 		return numa_node_id();
+
 	target = (unsigned int)off % nnodes;
 	c = 0;
 	do {
 		nid = next_node(nid, pol->v.nodes);
+		/*
+		 * When rebinding task->mempolicy, th kernel allocator
+		 * may see an empty nodemask, and next_node() returns
+		 * MAX_NUMNODES, In that case, we will use current node.
+		 */
+		if (unlikely(nid == MAX_NUMNODES))
+			return numa_node_id();
 		c++;
 	} while (c <= target);
 	return nid;
@@ -1631,7 +1655,9 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
 	case MPOL_BIND:
 		/* Fall through */
 	case MPOL_INTERLEAVE:
+		task_lock(current);
 		*mask =  mempolicy->v.nodes;
+		task_unlock(current);
 		break;
 
 	default:
-- 
1.6.5.2


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

      reply	other threads:[~2010-04-01  2:16 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-08 10:10 Miao Xie
2010-03-08 21:46 ` David Rientjes
2010-03-09  7:25   ` Miao Xie
2010-03-11  8:15     ` Nick Piggin
2010-03-11 10:33       ` Miao Xie
2010-03-11 11:03         ` Nick Piggin
2010-03-25 10:23           ` Miao Xie
2010-03-25 12:56             ` Miao Xie
2010-03-25 13:33           ` [PATCH] [PATCH -mmotm] cpuset,mm: use seqlock to protect task->mempolicy and mems_allowed (v2) (was: Re: [PATCH V2 4/4] cpuset,mm: update task's mems_allowed lazily) Miao Xie
2010-03-28  5:30             ` Bob Liu
2010-03-31 19:42             ` Andrew Morton
2010-03-31  9:54           ` [PATCH V2 4/4] cpuset,mm: update task's mems_allowed lazily Miao Xie
2010-03-31 10:34             ` David Rientjes
2010-04-01  2:16               ` Miao Xie [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BB40208.4010904@cn.fujitsu.com \
    --to=miaox@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=lee.schermerhorn@hp.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=menage@google.com \
    --cc=npiggin@suse.de \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox