linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 2)
@ 2008-06-14 12:22 Yasunori Goto
  2008-06-16 10:45 ` Andy Whitcroft
  0 siblings, 1 reply; 7+ messages in thread
From: Yasunori Goto @ 2008-06-14 12:22 UTC (permalink / raw)
  To: David Miller, Badari Pulavarty, Andrew Morton, Heiko Carstens
  Cc: Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML, linux-mm

Hello.

I update the patch which was cause of regression at bootmem.
This version allows normal allocation if alloc_bootmem_section() fails.
So, previous regression will not occur.

I tested on my box. But it's not sparc, s390, nor powerpc.
If there is any trouble by this patch, please let me know.

If no trouble, please apply.

Thanks.

---

Usemaps are allocated on the section which has pgdat by this.

Because usemap size is very small, many other sections usemaps
are allocated on only one page. If a section has usemap, it
can't be removed until removing other sections.
This dependency is not desirable for memory removing.

Pgdat has similar feature. When a section has pgdat area, it 
must be the last section for removing on the node.
So, if section A has pgdat and section B has usemap for section A,
Both sections can't be removed due to dependency each other.

To solve this issue, this patch collects usemap on same
section with pgdat as much as possible.
If other sections doesn't have any dependency, this section will
be able to be removed finally.

Change log of take 2.
 - This feature becomes effective only when CONFIG_MEMORY_HOTREMOVE is on.
   If hotremove is off, this feature is not necessary.
 - Allow allocation on other section if alloc_bootmem_section() fails.
   This avoids previous regression.
 - Show message if allocation on same section fails.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>

---

 mm/sparse.c |   77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c	2008-06-14 19:03:23.000000000 +0900
+++ current/mm/sparse.c	2008-06-14 20:41:39.000000000 +0900
@@ -269,16 +269,91 @@
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static unsigned long * __init
+sparse_early_usemap_alloc_section(unsigned long pnum)
+{
+	unsigned long section_nr;
+	struct mem_section *ms = __nr_to_section(pnum);
+	int nid = sparse_early_nid(ms);
+ 	struct pglist_data *pgdat = NODE_DATA(nid);
+
+	/*
+	 * Usemap's page can't be freed until freeing other sections
+	 * which use it. And, pgdat has same feature.
+	 * If section A has pgdat and section B has usemap for other
+	 * sections (includes section A), both sections can't be removed,
+	 * because there is the dependency each other.
+	 * To solve above issue, this collects all usemap on the same section
+	 * which has pgdat as much as possible.
+	 */
+	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	return alloc_bootmem_section(usemap_size(), section_nr);
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+	unsigned long usemap_snr, pgdat_snr;
+	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
+	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int usemap_nid;
+
+	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	if (usemap_snr == pgdat_snr)
+		return;
+
+	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
+		/* skip redundant message */
+		return;
+
+	old_usemap_snr = usemap_snr;
+	old_pgdat_snr = pgdat_snr;
+
+	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
+	if (usemap_nid != nid) {
+		printk("node %d must be removed before remove section %ld\n",
+		       nid, usemap_snr);
+		return;
+	}
+	/*
+	 * There is a dependency deadlock.
+	 * Some platforms allow un-removable section because they will just
+	 * gather other removable sections for dynamic partitioning.
+	 * Just notify un-removable section's number here.
+	 */
+	printk(KERN_INFO "section %ld and %ld", usemap_snr, pgdat_snr);
+	printk(" can't be hotremoved due to dependency each other.\n");
+}
+#else
+static unsigned long * __init
+sparse_early_usemap_alloc_section(unsigned long pnum)
+{
+	return NULL;
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 {
 	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
 	int nid = sparse_early_nid(ms);
 
-	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	usemap = sparse_early_usemap_alloc_section(pnum);
 	if (usemap)
 		return usemap;
 
+	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	if (usemap) {
+		check_usemap_section_nr(nid, usemap);
+		return usemap;
+	}
+
 	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 	nid = 0;
 

-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 2)
  2008-06-14 12:22 [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 2) Yasunori Goto
@ 2008-06-16 10:45 ` Andy Whitcroft
  2008-06-16 13:22   ` Yasunori Goto
  0 siblings, 1 reply; 7+ messages in thread
From: Andy Whitcroft @ 2008-06-16 10:45 UTC (permalink / raw)
  To: Yasunori Goto
  Cc: David Miller, Badari Pulavarty, Andrew Morton, Heiko Carstens,
	Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML, linux-mm,
	Mel Gorman

On Sat, Jun 14, 2008 at 09:22:44PM +0900, Yasunori Goto wrote:
> Hello.
> 
> I update the patch which was cause of regression at bootmem.
> This version allows normal allocation if alloc_bootmem_section() fails.
> So, previous regression will not occur.
> 
> I tested on my box. But it's not sparc, s390, nor powerpc.
> If there is any trouble by this patch, please let me know.
> 
> If no trouble, please apply.
> 
> Thanks.
> 
> ---
> 
> Usemaps are allocated on the section which has pgdat by this.
> 
> Because usemap size is very small, many other sections usemaps
> are allocated on only one page. If a section has usemap, it
> can't be removed until removing other sections.
> This dependency is not desirable for memory removing.
> 
> Pgdat has similar feature. When a section has pgdat area, it 
> must be the last section for removing on the node.
> So, if section A has pgdat and section B has usemap for section A,
> Both sections can't be removed due to dependency each other.
> 
> To solve this issue, this patch collects usemap on same
> section with pgdat as much as possible.
> If other sections doesn't have any dependency, this section will
> be able to be removed finally.
> 
> Change log of take 2.
>  - This feature becomes effective only when CONFIG_MEMORY_HOTREMOVE is on.
>    If hotremove is off, this feature is not necessary.
>  - Allow allocation on other section if alloc_bootmem_section() fails.
>    This avoids previous regression.
>  - Show message if allocation on same section fails.
> 
> Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
> 
> ---
> 
>  mm/sparse.c |   77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 76 insertions(+), 1 deletion(-)
> 
> Index: current/mm/sparse.c
> ===================================================================
> --- current.orig/mm/sparse.c	2008-06-14 19:03:23.000000000 +0900
> +++ current/mm/sparse.c	2008-06-14 20:41:39.000000000 +0900
> @@ -269,16 +269,91 @@
>  }
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> +static unsigned long * __init
> +sparse_early_usemap_alloc_section(unsigned long pnum)
> +{
> +	unsigned long section_nr;
> +	struct mem_section *ms = __nr_to_section(pnum);
> +	int nid = sparse_early_nid(ms);
> + 	struct pglist_data *pgdat = NODE_DATA(nid);
> +
> +	/*
> +	 * Usemap's page can't be freed until freeing other sections
> +	 * which use it. And, pgdat has same feature.
> +	 * If section A has pgdat and section B has usemap for other
> +	 * sections (includes section A), both sections can't be removed,
> +	 * because there is the dependency each other.
> +	 * To solve above issue, this collects all usemap on the same section
> +	 * which has pgdat as much as possible.
> +	 */
> +	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> +	return alloc_bootmem_section(usemap_size(), section_nr);
> +}
> +
> +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> +{
> +	unsigned long usemap_snr, pgdat_snr;
> +	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> +	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> +	struct pglist_data *pgdat = NODE_DATA(nid);
> +	int usemap_nid;
> +
> +	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> +	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> +	if (usemap_snr == pgdat_snr)
> +		return;
> +
> +	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
> +		/* skip redundant message */
> +		return;
> +
> +	old_usemap_snr = usemap_snr;
> +	old_pgdat_snr = pgdat_snr;

The pgdat and usemap sections are node specific, but this repeat message
check is not, so if I add sections alternatly to node 0 and node 1 I
will recieve the warning for every addition?

> +
> +	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
> +	if (usemap_nid != nid) {
> +		printk("node %d must be removed before remove section %ld\n",
> +		       nid, usemap_snr);
> +		return;
> +	}
> +	/*
> +	 * There is a dependency deadlock.
> +	 * Some platforms allow un-removable section because they will just
> +	 * gather other removable sections for dynamic partitioning.
> +	 * Just notify un-removable section's number here.
> +	 */
> +	printk(KERN_INFO "section %ld and %ld", usemap_snr, pgdat_snr);
> +	printk(" can't be hotremoved due to dependency each other.\n");

This might be better worded as a circular dependancy.  Also it would be
nice to include the node perhaps:

	"Sections %ld and %ld (node %ld) have a circular dependancy on
	usemap and pgdat allocations"

> +}
> +#else
> +static unsigned long * __init
> +sparse_early_usemap_alloc_section(unsigned long pnum)
> +{
> +	return NULL;
> +}
> +
> +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> +{
> +}
> +#endif /* CONFIG_MEMORY_HOTREMOVE */
> +
>  static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
>  {
>  	unsigned long *usemap;
>  	struct mem_section *ms = __nr_to_section(pnum);
>  	int nid = sparse_early_nid(ms);
>  
> -	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> +	usemap = sparse_early_usemap_alloc_section(pnum);
>  	if (usemap)
>  		return usemap;
>  
> +	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> +	if (usemap) {
> +		check_usemap_section_nr(nid, usemap);
> +		return usemap;
> +	}
> +
>  	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
>  	nid = 0;

-apw

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 2)
  2008-06-16 10:45 ` Andy Whitcroft
@ 2008-06-16 13:22   ` Yasunori Goto
  2008-06-17 11:07     ` [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3) Yasunori Goto
  0 siblings, 1 reply; 7+ messages in thread
From: Yasunori Goto @ 2008-06-16 13:22 UTC (permalink / raw)
  To: Andy Whitcroft
  Cc: David Miller, Badari Pulavarty, Andrew Morton, Heiko Carstens,
	Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML, linux-mm,
	Mel Gorman

> > +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> > +{
> > +	unsigned long usemap_snr, pgdat_snr;
> > +	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> > +	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> > +	struct pglist_data *pgdat = NODE_DATA(nid);
> > +	int usemap_nid;
> > +
> > +	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> > +	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> > +	if (usemap_snr == pgdat_snr)
> > +		return;
> > +
> > +	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
> > +		/* skip redundant message */
> > +		return;
> > +
> > +	old_usemap_snr = usemap_snr;
> > +	old_pgdat_snr = pgdat_snr;
> 
> The pgdat and usemap sections are node specific, but this repeat message
> check is not, so if I add sections alternatly to node 0 and node 1 I
> will recieve the warning for every addition?

Yes. alloc_bootmem_section() may be failed, and usemap may be allocated on
other node. I would like to notice for its dependency case too.

> 
> > +
> > +	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
> > +	if (usemap_nid != nid) {
> > +		printk("node %d must be removed before remove section %ld\n",
> > +		       nid, usemap_snr);
> > +		return;
> > +	}
> > +	/*
> > +	 * There is a dependency deadlock.
> > +	 * Some platforms allow un-removable section because they will just
> > +	 * gather other removable sections for dynamic partitioning.
> > +	 * Just notify un-removable section's number here.
> > +	 */
> > +	printk(KERN_INFO "section %ld and %ld", usemap_snr, pgdat_snr);
> > +	printk(" can't be hotremoved due to dependency each other.\n");
> 
> This might be better worded as a circular dependancy.  Also it would be
> nice to include the node perhaps:
> 
> 	"Sections %ld and %ld (node %ld) have a circular dependancy on
> 	usemap and pgdat allocations"

Thanks. I'll change it.


-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3)
  2008-06-16 13:22   ` Yasunori Goto
@ 2008-06-17 11:07     ` Yasunori Goto
  2008-06-23 20:49       ` Mel Gorman
  0 siblings, 1 reply; 7+ messages in thread
From: Yasunori Goto @ 2008-06-17 11:07 UTC (permalink / raw)
  To: Andy Whitcroft, Andrew Morton
  Cc: David Miller, Badari Pulavarty, Heiko Carstens,
	Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML, linux-mm,
	Mel Gorman

Here is take 3 for usemap allocation on pgdat section.

If there is any trouble, please let me know.

If no trouble, please apply.

Thanks.

---

Usemaps are allocated on the section which has pgdat by this.

Because usemap size is very small, many other sections usemaps
are allocated on only one page. If a section has usemap, it
can't be removed until removing other sections.
This dependency is not desirable for memory removing.

Pgdat has similar feature. When a section has pgdat area, it 
must be the last section for removing on the node.
So, if section A has pgdat and section B has usemap for section A,
Both sections can't be removed due to dependency each other.

To solve this issue, this patch collects usemap on same
section with pgdat as much as possible.
If other sections doesn't have any dependency, this section will
be able to be removed finally.

Change log of take 3.
 - Change dependency message and comment.
  (Thanks! > Andy Whitcroft-san)

Change log of take 2.
 - This feature becomes effective only when CONFIG_MEMORY_HOTREMOVE is on.
   If hotremove is off, this feature is not necessary.
 - Allow allocation on other section if alloc_bootmem_section() fails.
   This removes previous regression.
 - Show message if allocation on same section fails.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>

---

 mm/sparse.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c	2008-06-17 15:34:29.000000000 +0900
+++ current/mm/sparse.c	2008-06-17 18:35:02.000000000 +0900
@@ -269,16 +269,92 @@
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static unsigned long * __init
+sparse_early_usemap_alloc_section(unsigned long pnum)
+{
+	unsigned long section_nr;
+	struct mem_section *ms = __nr_to_section(pnum);
+	int nid = sparse_early_nid(ms);
+ 	struct pglist_data *pgdat = NODE_DATA(nid);
+
+	/*
+	 * Usemap's page can't be freed until freeing other sections
+	 * which use it. And, pgdat has same feature.
+	 * If section A has pgdat and section B has usemap for other
+	 * sections (includes section A), both sections can't be removed,
+	 * because there is the dependency each other.
+	 * To solve above issue, this collects all usemap on the same section
+	 * which has pgdat as much as possible.
+	 */
+	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	return alloc_bootmem_section(usemap_size(), section_nr);
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+	unsigned long usemap_snr, pgdat_snr;
+	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
+	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int usemap_nid;
+
+	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	if (usemap_snr == pgdat_snr)
+		return;
+
+	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
+		/* skip redundant message */
+		return;
+
+	old_usemap_snr = usemap_snr;
+	old_pgdat_snr = pgdat_snr;
+
+	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
+	if (usemap_nid != nid) {
+		printk("node %d must be removed before remove section %ld\n",
+		       nid, usemap_snr);
+		return;
+	}
+	/*
+	 * There is a circular dependency.
+	 * Some platforms allow un-removable section because they will just
+	 * gather other removable sections for dynamic partitioning.
+	 * Just notify un-removable section's number here.
+	 */
+	printk(KERN_INFO "Section %ld and %ld (node %d)",
+	       usemap_snr, pgdat_snr, nid);
+	printk(" have a circular dependency on usemap and pgdat allocations\n");
+}
+#else
+static unsigned long * __init
+sparse_early_usemap_alloc_section(unsigned long pnum)
+{
+	return NULL;
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 {
 	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
 	int nid = sparse_early_nid(ms);
 
-	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	usemap = sparse_early_usemap_alloc_section(pnum);
 	if (usemap)
 		return usemap;
 
+	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	if (usemap) {
+		check_usemap_section_nr(nid, usemap);
+		return usemap;
+	}
+
 	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 	nid = 0;
 

-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3)
  2008-06-17 11:07     ` [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3) Yasunori Goto
@ 2008-06-23 20:49       ` Mel Gorman
  2008-06-24  3:12         ` Yasunori Goto
  0 siblings, 1 reply; 7+ messages in thread
From: Mel Gorman @ 2008-06-23 20:49 UTC (permalink / raw)
  To: Yasunori Goto
  Cc: Andy Whitcroft, Andrew Morton, David Miller, Badari Pulavarty,
	Heiko Carstens, Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML,
	linux-mm

On (17/06/08 20:07), Yasunori Goto didst pronounce:
> 
> Here is take 3 for usemap allocation on pgdat section.
> 
> If there is any trouble, please let me know.
> 
> If no trouble, please apply.
> 
> Thanks.
> 

This boot-tested successfully on a few machines. I wasn't able to get
many machines but at first take, it seems ok.

> ---
> 
> Usemaps are allocated on the section which has pgdat by this.
> 
> Because usemap size is very small, many other sections usemaps
> are allocated on only one page. If a section has usemap, it
> can't be removed until removing other sections.
> This dependency is not desirable for memory removing.
> 

True. I have a report complaining that a node cannot be removed because
of some reserved pages at the start of the node. I have not looked
closely yet but it is possible that it is a page containing usemaps for
another section that is in there.

> Pgdat has similar feature. When a section has pgdat area, it 
> must be the last section for removing on the node.
> So, if section A has pgdat and section B has usemap for section A,
> Both sections can't be removed due to dependency each other.
> 
> To solve this issue, this patch collects usemap on same
> section with pgdat as much as possible.
> If other sections doesn't have any dependency, this section will
> be able to be removed finally.
> 
> Change log of take 3.
>  - Change dependency message and comment.
>   (Thanks! > Andy Whitcroft-san)
> 
> Change log of take 2.
>  - This feature becomes effective only when CONFIG_MEMORY_HOTREMOVE is on.
>    If hotremove is off, this feature is not necessary.
>  - Allow allocation on other section if alloc_bootmem_section() fails.
>    This removes previous regression.
>  - Show message if allocation on same section fails.
> 
> Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
> 
> ---
> 
>  mm/sparse.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 1 deletion(-)
> 
> Index: current/mm/sparse.c
> ===================================================================
> --- current.orig/mm/sparse.c	2008-06-17 15:34:29.000000000 +0900
> +++ current/mm/sparse.c	2008-06-17 18:35:02.000000000 +0900
> @@ -269,16 +269,92 @@
>  }
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> +static unsigned long * __init
> +sparse_early_usemap_alloc_section(unsigned long pnum)
> +{
> +	unsigned long section_nr;
> +	struct mem_section *ms = __nr_to_section(pnum);
> +	int nid = sparse_early_nid(ms);
> + 	struct pglist_data *pgdat = NODE_DATA(nid);
> +

It's not a major deal but the only caller of
sparse_early_usemap_alloc_section() has the nid already. If you looked up
the pgdat there and passed it in, it would involve fewer lookups. Granted,
this is not performance critical or anything so it's not a major deal.

> +	/*
> +	 * Usemap's page can't be freed until freeing other sections
> +	 * which use it. And, pgdat has same feature.
> +	 * If section A has pgdat and section B has usemap for other
> +	 * sections (includes section A), both sections can't be removed,
> +	 * because there is the dependency each other.
> +	 * To solve above issue, this collects all usemap on the same section
> +	 * which has pgdat as much as possible.
> +	 */

The comment is a bit tricky to read. How about?

	/*
	 * A page may contain usemaps for other sections preventing the
	 * the page being freed and making a section unremovable while
	 * other sections referencing the usemap remain active. Similarly,
	 * a pgdat can prevent a section being removed. If section A
	 * contains a pgdat and section B contains the usemap, both
	 * sections become inter-dependent. This allocates usemaps
	 * from the same section as the pgdat where possible to avoid
	 * this problem.
	 */

> +	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> +	return alloc_bootmem_section(usemap_size(), section_nr);
> +}
> +
> +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> +{
> +	unsigned long usemap_snr, pgdat_snr;
> +	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> +	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> +	struct pglist_data *pgdat = NODE_DATA(nid);
> +	int usemap_nid;
> +
> +	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> +	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> +	if (usemap_snr == pgdat_snr)
> +		return;
> +
> +	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
> +		/* skip redundant message */
> +		return;
> +
> +	old_usemap_snr = usemap_snr;
> +	old_pgdat_snr = pgdat_snr;
> +
> +	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
> +	if (usemap_nid != nid) {
> +		printk("node %d must be removed before remove section %ld\n",
> +		       nid, usemap_snr);
> +		return;

no kernel log level here

> +	}
> +	/*
> +	 * There is a circular dependency.
> +	 * Some platforms allow un-removable section because they will just
> +	 * gather other removable sections for dynamic partitioning.
> +	 * Just notify un-removable section's number here.
> +	 */
> +	printk(KERN_INFO "Section %ld and %ld (node %d)",
> +	       usemap_snr, pgdat_snr, nid);
> +	printk(" have a circular dependency on usemap and pgdat allocations\n");

a follow-on printk like this should use KERN_CONT

> +}
> +#else
> +static unsigned long * __init
> +sparse_early_usemap_alloc_section(unsigned long pnum)
> +{
> +	return NULL;
> +}
> +
> +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> +{
> +}
> +#endif /* CONFIG_MEMORY_HOTREMOVE */
> +
>  static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
>  {
>  	unsigned long *usemap;
>  	struct mem_section *ms = __nr_to_section(pnum);
>  	int nid = sparse_early_nid(ms);
>  
> -	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> +	usemap = sparse_early_usemap_alloc_section(pnum);
>  	if (usemap)
>  		return usemap;
>  
> +	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> +	if (usemap) {
> +		check_usemap_section_nr(nid, usemap);
> +		return usemap;
> +	}
> +
>  	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
>  	nid = 0;
>  

Just a few minor things that need cleaning up there. Otherwise, the idea
seems sound.

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3)
  2008-06-23 20:49       ` Mel Gorman
@ 2008-06-24  3:12         ` Yasunori Goto
  2008-06-24  8:41           ` [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 4) Yasunori Goto
  0 siblings, 1 reply; 7+ messages in thread
From: Yasunori Goto @ 2008-06-24  3:12 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andy Whitcroft, Andrew Morton, David Miller, Badari Pulavarty,
	Heiko Carstens, Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML,
	linux-mm

Hi.

> On (17/06/08 20:07), Yasunori Goto didst pronounce:
> > 
> > Here is take 3 for usemap allocation on pgdat section.
> > 
> > If there is any trouble, please let me know.
> > 
> > If no trouble, please apply.
> > 
> > Thanks.
> > 
> 
> This boot-tested successfully on a few machines. I wasn't able to get
> many machines but at first take, it seems ok.

Good news! :-)

> > Index: current/mm/sparse.c
> > ===================================================================
> > --- current.orig/mm/sparse.c	2008-06-17 15:34:29.000000000 +0900
> > +++ current/mm/sparse.c	2008-06-17 18:35:02.000000000 +0900
> > @@ -269,16 +269,92 @@
> >  }
> >  #endif /* CONFIG_MEMORY_HOTPLUG */
> >  
> > +#ifdef CONFIG_MEMORY_HOTREMOVE
> > +static unsigned long * __init
> > +sparse_early_usemap_alloc_section(unsigned long pnum)
> > +{
> > +	unsigned long section_nr;
> > +	struct mem_section *ms = __nr_to_section(pnum);
> > +	int nid = sparse_early_nid(ms);
> > + 	struct pglist_data *pgdat = NODE_DATA(nid);
> > +
> 
> It's not a major deal but the only caller of
> sparse_early_usemap_alloc_section() has the nid already. If you looked up
> the pgdat there and passed it in, it would involve fewer lookups. Granted,
> this is not performance critical or anything so it's not a major deal.

Ok. I'll change it.


> 
> > +	/*
> > +	 * Usemap's page can't be freed until freeing other sections
> > +	 * which use it. And, pgdat has same feature.
> > +	 * If section A has pgdat and section B has usemap for other
> > +	 * sections (includes section A), both sections can't be removed,
> > +	 * because there is the dependency each other.
> > +	 * To solve above issue, this collects all usemap on the same section
> > +	 * which has pgdat as much as possible.
> > +	 */
> 
> The comment is a bit tricky to read. How about?
> 
> 	/*
> 	 * A page may contain usemaps for other sections preventing the
> 	 * the page being freed and making a section unremovable while
> 	 * other sections referencing the usemap remain active. Similarly,
> 	 * a pgdat can prevent a section being removed. If section A
> 	 * contains a pgdat and section B contains the usemap, both
> 	 * sections become inter-dependent. This allocates usemaps
> 	 * from the same section as the pgdat where possible to avoid
> 	 * this problem.
> 	 */

Thanks! Looks better than mine. I'll change it to yours.


> 
> > +	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> > +	return alloc_bootmem_section(usemap_size(), section_nr);
> > +}
> > +
> > +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> > +{
> > +	unsigned long usemap_snr, pgdat_snr;
> > +	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> > +	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> > +	struct pglist_data *pgdat = NODE_DATA(nid);
> > +	int usemap_nid;
> > +
> > +	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> > +	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> > +	if (usemap_snr == pgdat_snr)
> > +		return;
> > +
> > +	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
> > +		/* skip redundant message */
> > +		return;
> > +
> > +	old_usemap_snr = usemap_snr;
> > +	old_pgdat_snr = pgdat_snr;
> > +
> > +	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
> > +	if (usemap_nid != nid) {
> > +		printk("node %d must be removed before remove section %ld\n",
> > +		       nid, usemap_snr);
> > +		return;
> 
> no kernel log level here

Ok. I'll add KERN_INFO.

> 
> > +	}
> > +	/*
> > +	 * There is a circular dependency.
> > +	 * Some platforms allow un-removable section because they will just
> > +	 * gather other removable sections for dynamic partitioning.
> > +	 * Just notify un-removable section's number here.
> > +	 */
> > +	printk(KERN_INFO "Section %ld and %ld (node %d)",
> > +	       usemap_snr, pgdat_snr, nid);
> > +	printk(" have a circular dependency on usemap and pgdat allocations\n");
> 
> a follow-on printk like this should use KERN_CONT

OK.

> > +}
> > +#else
> > +static unsigned long * __init
> > +sparse_early_usemap_alloc_section(unsigned long pnum)
> > +{
> > +	return NULL;
> > +}
> > +
> > +static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> > +{
> > +}
> > +#endif /* CONFIG_MEMORY_HOTREMOVE */
> > +
> >  static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
> >  {
> >  	unsigned long *usemap;
> >  	struct mem_section *ms = __nr_to_section(pnum);
> >  	int nid = sparse_early_nid(ms);
> >  
> > -	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> > +	usemap = sparse_early_usemap_alloc_section(pnum);
> >  	if (usemap)
> >  		return usemap;
> >  
> > +	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
> > +	if (usemap) {
> > +		check_usemap_section_nr(nid, usemap);
> > +		return usemap;
> > +	}
> > +
> >  	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
> >  	nid = 0;
> >  
> 
> Just a few minor things that need cleaning up there. Otherwise, the idea
> seems sound.

Thanks for reviewing and testing. :-)

Bye.
-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 4)
  2008-06-24  3:12         ` Yasunori Goto
@ 2008-06-24  8:41           ` Yasunori Goto
  0 siblings, 0 replies; 7+ messages in thread
From: Yasunori Goto @ 2008-06-24  8:41 UTC (permalink / raw)
  To: Andrew Morton, Mel Gorman
  Cc: Andy Whitcroft, David Miller, Badari Pulavarty, Heiko Carstens,
	Hiroyuki KAMEZAWA, Tony Breeds, Linux Kernel ML, linux-mm

Hi.

Here is take 4.
Mel-san's comments are refrected. (some cleanups)

At present, there is no regression report after take 3.
I wish this patch would be merged into -next or -mm.

Please apply.

Thanks.

---

Usemaps are allocated on the section which has pgdat by this.

Because usemap size is very small, many other sections usemaps
are allocated on only one page. If a section has usemap, it
can't be removed until removing other sections.
This dependency is not desirable for memory removing.

Pgdat has similar feature. When a section has pgdat area, it 
must be the last section for removing on the node.
So, if section A has pgdat and section B has usemap for section A,
Both sections can't be removed due to dependency each other.

To solve this issue, this patch collects usemap on same
section with pgdat as much as possible.
If other sections doesn't have any dependency, this section will
be able to be removed finally.

Change log of take 4.
 - Some cleanups.
   * Add KERN_INFO and KERN_CONT for printk messages.
   * Remove redundant nid calculation in 
     sparse_early_usemap_alloc_section().
     pgdat is passed directly, and function name is changed to
     sparse_early_usemap_alloc_pgdat_section(pgdat).
     (Because argument becomes pgdat, not pnum).
   * Comment was changed to be read easier.

Change log of take 3.
 - Change dependency message and comment.
  (Thanks! > Andy Whitcroft-san)

Change log of take 2.
 - This feature becomes effective only when CONFIG_MEMORY_HOTREMOVE is on.
   If hotremove is off, this feature is not necessary.
 - Allow allocation on other section if alloc_bootmem_section() fails.
   This removes previous regression.
 - Show message if allocation on same section fails.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>

---

 mm/sparse.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c	2008-06-17 15:34:29.000000000 +0900
+++ current/mm/sparse.c	2008-06-24 15:29:21.000000000 +0900
@@ -269,16 +269,92 @@
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static unsigned long * __init
+sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+{
+	unsigned long section_nr;
+
+	/*
+	 * A page may contain usemaps for other sections preventing the
+	 * page being freed and making a section unremovable while
+	 * other sections referencing the usemap retmain active. Similarly,
+	 * a pgdat can prevent a section being removed. If section A
+	 * contains a pgdat and section B contains the usemap, both
+	 * sections become inter-dependent. This allocates usemaps
+	 * from the same section as the pgdat where possible to avoid
+	 * this problem.
+	 */
+	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	return alloc_bootmem_section(usemap_size(), section_nr);
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+	unsigned long usemap_snr, pgdat_snr;
+	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
+	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int usemap_nid;
+
+	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	if (usemap_snr == pgdat_snr)
+		return;
+
+	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
+		/* skip redundant message */
+		return;
+
+	old_usemap_snr = usemap_snr;
+	old_pgdat_snr = pgdat_snr;
+
+	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
+	if (usemap_nid != nid) {
+		printk(KERN_INFO
+		       "node %d must be removed before remove section %ld\n",
+		       nid, usemap_snr);
+		return;
+	}
+	/*
+	 * There is a circular dependency.
+	 * Some platforms allow un-removable section because they will just
+	 * gather other removable sections for dynamic partitioning.
+	 * Just notify un-removable section's number here.
+	 */
+	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
+	       pgdat_snr, nid);
+	printk(KERN_CONT
+	       " have a circular dependency on usemap and pgdat allocations\n");
+}
+#else
+static unsigned long * __init
+sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+{
+	return NULL;
+}
+
+static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+{
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 {
 	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
 	int nid = sparse_early_nid(ms);
 
-	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
 	if (usemap)
 		return usemap;
 
+	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	if (usemap) {
+		check_usemap_section_nr(nid, usemap);
+		return usemap;
+	}
+
 	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 	nid = 0;
 

-- 
Yasunori Goto 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-06-24  8:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-06-14 12:22 [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 2) Yasunori Goto
2008-06-16 10:45 ` Andy Whitcroft
2008-06-16 13:22   ` Yasunori Goto
2008-06-17 11:07     ` [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 3) Yasunori Goto
2008-06-23 20:49       ` Mel Gorman
2008-06-24  3:12         ` Yasunori Goto
2008-06-24  8:41           ` [Patch](memory hotplug)Allocate usemap on the section with pgdat (take 4) Yasunori Goto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox