linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yasunori Goto <y-goto@jp.fujitsu.com>
To: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Mel Gorman <mel@csn.ul.ie>,
	Christoph Lameter <cl@linux-foundation.org>,
	linux-mm <linux-mm@kvack.org>,
	Linux Kernel ML <linux-kernel@vger.kernel.org>
Subject: [RFC:Patch: 005/008](memory hotplug) check node online before NODE_DATA and so on
Date: Thu, 31 Jul 2008 21:01:14 +0900	[thread overview]
Message-ID: <20080731210011.2A4B.E1E9C6FF@jp.fujitsu.com> (raw)
In-Reply-To: <20080731203549.2A3F.E1E9C6FF@jp.fujitsu.com>

When kernel uses NODE_DATA(nid), kernel must check its node is really online
or not. In addition, if numa_node_id() returns offlined node,
it must be bug because cpu offline on the node has to be executed before
node offline.
This patch checks it, and add read locks on some other little parsing
zone/zonelist places.


Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>

---
 mm/mempolicy.c  |   11 +++++++++--
 mm/page_alloc.c |   19 +++++++++++++++++--
 mm/quicklist.c  |    8 +++++++-
 mm/slub.c       |    7 ++++++-
 mm/vmscan.c     |   22 +++++++++++++++++++---
 5 files changed, 58 insertions(+), 9 deletions(-)

Index: current/mm/page_alloc.c
===================================================================
--- current.orig/mm/page_alloc.c	2008-07-29 22:06:46.000000000 +0900
+++ current/mm/page_alloc.c	2008-07-29 22:17:16.000000000 +0900
@@ -1884,7 +1884,12 @@ static unsigned int nr_free_zone_pages(i
 	/* Just pick one node, since fallback list is circular */
 	unsigned int sum = 0;
 
-	struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
+	struct zonelist *zonelist;
+	int node = numa_node_id();
+
+	pgdat_remove_read_lock();
+	BUG_ON(!node_online(node));
+	zonelist = node_zonelist(node, GFP_KERNEL);
 
 	for_each_zone_zonelist(zone, z, zonelist, offset) {
 		unsigned long size = zone->present_pages;
@@ -1892,6 +1897,7 @@ static unsigned int nr_free_zone_pages(i
 		if (size > high)
 			sum += size - high;
 	}
+	pgdat_remove_read_unlock();
 
 	return sum;
 }
@@ -1935,7 +1941,14 @@ EXPORT_SYMBOL(si_meminfo);
 #ifdef CONFIG_NUMA
 void si_meminfo_node(struct sysinfo *val, int nid)
 {
-	pg_data_t *pgdat = NODE_DATA(nid);
+	pg_data_t *pgdat;
+
+	pgdat_remove_read_lock();
+	if (unlikely(!node_online(nid))) {
+		    pgdat_remove_read_unlock();
+		    return;
+	}
+	pgdat = NODE_DATA(nid);
 
 	val->totalram = pgdat->node_present_pages;
 	val->freeram = node_page_state(nid, NR_FREE_PAGES);
@@ -1947,6 +1960,8 @@ void si_meminfo_node(struct sysinfo *val
 	val->totalhigh = 0;
 	val->freehigh = 0;
 #endif
+	pgdat_remove_read_unlock();
+
 	val->mem_unit = PAGE_SIZE;
 }
 #endif
Index: current/mm/quicklist.c
===================================================================
--- current.orig/mm/quicklist.c	2008-07-29 22:06:46.000000000 +0900
+++ current/mm/quicklist.c	2008-07-29 22:17:16.000000000 +0900
@@ -26,7 +26,12 @@ DEFINE_PER_CPU(struct quicklist, quickli
 static unsigned long max_pages(unsigned long min_pages)
 {
 	unsigned long node_free_pages, max;
-	struct zone *zones = NODE_DATA(numa_node_id())->node_zones;
+	struct zone *zones;
+	int node = numa_node_id();
+
+	pgdat_remove_read_lock();
+	BUG_ON(!node_online(node));
+	zones = NODE_DATA(node)->node_zones;
 
 	node_free_pages =
 #ifdef CONFIG_ZONE_DMA
@@ -37,6 +42,7 @@ static unsigned long max_pages(unsigned 
 #endif
 		zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
 
+	pgdat_remove_read_unlock();
 	max = node_free_pages / FRACTION_OF_NODE_MEM;
 	return max(max, min_pages);
 }
Index: current/mm/vmscan.c
===================================================================
--- current.orig/mm/vmscan.c	2008-07-29 22:06:46.000000000 +0900
+++ current/mm/vmscan.c	2008-07-29 22:17:42.000000000 +0900
@@ -1710,11 +1710,21 @@ unsigned long try_to_free_mem_cgroup_pag
 		.isolate_pages = mem_cgroup_isolate_pages,
 	};
 	struct zonelist *zonelist;
+	unsigned long ret;
+	int node = numa_node_id();
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
-	zonelist = NODE_DATA(numa_node_id())->node_zonelists;
-	return do_try_to_free_pages(zonelist, &sc);
+
+	pgdat_remove_read_lock_sleepable();
+	if (unlikely(!node_online(node))) {
+		pgdat_remove_read_unlock_sleepable();
+		return 0;
+	}
+	zonelist = NODE_DATA(node)->node_zonelists;
+	ret = do_try_to_free_pages(zonelist, &sc);
+	pgdat_remove_read_unlock_sleepable();
+	return ret;
 }
 #endif
 
@@ -2636,19 +2646,25 @@ static ssize_t read_scan_unevictable_nod
 static ssize_t write_scan_unevictable_node(struct sys_device *dev,
 					const char *buf, size_t count)
 {
-	struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
+	struct zone *node_zones;
 	struct zone *zone;
 	unsigned long res;
 	unsigned long req = strict_strtoul(buf, 10, &res);
+	int node = dev->id;
 
 	if (!req)
 		return 1;	/* zero is no-op */
 
+	pgdat_remove_read_lock();
+	BUG_ON(!node_online(node));
+
+	node_zones = NODE_DATA(node)->node_zones;
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
 		if (!populated_zone(zone))
 			continue;
 		scan_zone_unevictable_pages(zone);
 	}
+	pgdat_remove_read_unlock();
 	return 1;
 }
 
Index: current/mm/slub.c
===================================================================
--- current.orig/mm/slub.c	2008-07-29 22:06:46.000000000 +0900
+++ current/mm/slub.c	2008-07-29 22:17:16.000000000 +0900
@@ -1300,6 +1300,7 @@ static struct page *get_any_partial(stru
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(flags);
 	struct page *page;
+	int node;
 
 	/*
 	 * The defrag ratio allows a configuration of the tradeoffs between
@@ -1323,7 +1324,10 @@ static struct page *get_any_partial(stru
 			get_cycles() % 1024 > s->remote_node_defrag_ratio)
 		return NULL;
 
-	zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+	pgdat_remove_read_lock();
+	node = slab_node(current->mempolicy);
+	BUG_ON(!node_online(node));
+	zonelist = node_zonelist(node, flags);
 	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 		struct kmem_cache_node *n;
 
@@ -1336,6 +1340,7 @@ static struct page *get_any_partial(stru
 				return page;
 		}
 	}
+	pgdat_remove_read_unlock();
 #endif
 	return NULL;
 }
Index: current/mm/mempolicy.c
===================================================================
--- current.orig/mm/mempolicy.c	2008-07-29 22:06:46.000000000 +0900
+++ current/mm/mempolicy.c	2008-07-29 22:17:40.000000000 +0900
@@ -1407,11 +1407,18 @@ unsigned slab_node(struct mempolicy *pol
 		struct zonelist *zonelist;
 		struct zone *zone;
 		enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
-		zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0];
+		int node = numa_node_id();
+
+		pgdat_remove_read_lock();
+		BUG_ON(!node_online(node));
+		zonelist = &NODE_DATA(node)->node_zonelists[0];
 		(void)first_zones_zonelist(zonelist, highest_zoneidx,
 							&policy->v.nodes,
 							&zone);
-		return zone->node;
+		node = zone->node;
+		pgdat_remove_read_unlock();
+
+		return node;
 	}
 
 	default:

-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-07-31 12:01 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-31 11:50 [RFC:Patch: 000/008](memory hotplug) rough idea of pgdat removing Yasunori Goto
2008-07-31 11:55 ` [RFC:Patch: 001/008](memory hotplug) change parameter from pointer of zonelist to node id Yasunori Goto
2008-07-31 11:56 ` [RFC:Patch: 002/008](memory hotplug) pgdat_remove_read_lock/unlock Yasunori Goto
2008-07-31 11:58 ` [RFC:Patch: 003/008](memory hotplug) check node online in __alloc_pages Yasunori Goto
2008-07-31 12:00 ` [RFC:Patch: 004/008](memory hotplug) Use lock for for_each_online_node Yasunori Goto
2008-07-31 12:01 ` Yasunori Goto [this message]
2008-07-31 12:02 ` [RFC:Patch: 006/008](memory hotplug) kswapd_stop() definition Yasunori Goto
2008-07-31 12:03 ` [RFC:Patch: 007/008](memory hotplug) callback routine for mempolicy Yasunori Goto
2008-07-31 12:04 ` [RFC:Patch: 008/008](memory hotplug) remove_pgdat() function Yasunori Goto
2008-09-06 14:21   ` Peter Zijlstra
2008-09-08  3:07     ` Yasunori Goto
2008-07-31 14:04 ` [RFC:Patch: 000/008](memory hotplug) rough idea of pgdat removing Christoph Lameter
2008-08-01  9:42   ` Yasunori Goto
2008-08-01 13:51     ` Christoph Lameter
2008-08-02  0:16       ` Yasunori Goto
2008-08-04 13:25         ` Christoph Lameter
2008-08-05  6:39           ` Yasunori Goto
2008-08-05 11:14             ` Mel Gorman
2008-08-05 17:08               ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080731210011.2A4B.E1E9C6FF@jp.fujitsu.com \
    --to=y-goto@jp.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=pbadari@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox