* [PATCH] memcg: defer page_cgroup initialization
@ 2013-04-09 9:25 Glauber Costa
2013-04-09 13:36 ` Johannes Weiner
2013-04-09 16:15 ` Kamezawa Hiroyuki
0 siblings, 2 replies; 5+ messages in thread
From: Glauber Costa @ 2013-04-09 9:25 UTC (permalink / raw)
To: linux-mm
Cc: cgroups, Andrew Morton, Li Zefan, Glauber Costa, Michal Hocko,
Kamezawa Hiroyuki, Johannes Weiner
We have now reached the point in which there is no real need to allocate
page_cgroup upon system boot. We can defer it to the first memcg
initialization, and if it fails, we treat it like any other memcg memory
failures (like for instance, if the mem_cgroup structure itself failed).
In the future, we may want to defer this to the first non-root cgroup
initialization, but we are not there yet. With that, page_cgroup can be
more silent in its initialization.
Unfortunately, doing that for flatmem models would lead to significant
vmalloc-area waste. Since big-memory 32-bit machines are quite common,
this would be reality for most of them. This means that we will leave
FLATMEM alone, and fix only the SPARSEMEM case. We modify the message
slightly so that in future reports we know precisely if this message is
from a flatmem kernel or a older kernel initializing page_cgroup early.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/page_cgroup.h | 13 +++++++------
init/main.c | 1 -
mm/memcontrol.c | 2 ++
mm/page_cgroup.c | 19 ++++++++-----------
4 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524..bfb43f0 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -33,11 +33,16 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
static inline void __init page_cgroup_init_flatmem(void)
{
}
-extern void __init page_cgroup_init(void);
+extern bool page_cgroup_init(void);
#else
void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+/*
+ * If we reach here, we would have already initialized flatmem mappings.
+ * So just always succeed
+ */
+static inline bool page_cgroup_init(void)
{
+ return 0;
}
#endif
@@ -94,10 +99,6 @@ static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
return NULL;
}
-static inline void page_cgroup_init(void)
-{
-}
-
static inline void __init page_cgroup_init_flatmem(void)
{
}
diff --git a/init/main.c b/init/main.c
index cee4b5c..49aa019 100644
--- a/init/main.c
+++ b/init/main.c
@@ -592,7 +592,6 @@ asmlinkage void __init start_kernel(void)
initrd_start = 0;
}
#endif
- page_cgroup_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f608546..59a5b1f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6357,6 +6357,8 @@ mem_cgroup_css_alloc(struct cgroup *cont)
res_counter_init(&memcg->res, NULL);
res_counter_init(&memcg->memsw, NULL);
res_counter_init(&memcg->kmem, NULL);
+ if (page_cgroup_init())
+ goto free_out;
}
memcg->last_scanned_node = MAX_NUMNODES;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6d757e3..679de38 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -78,7 +78,8 @@ void __init page_cgroup_init_flatmem(void)
}
printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
- " don't want memory cgroups\n");
+ " don't want memory cgroups. Alternatively, use SPARSEMEM mappings"
+ " to defer initialization until actual use.");
return;
fail:
printk(KERN_CRIT "allocation of page_cgroup failed.\n");
@@ -266,13 +267,13 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
#endif
-void __init page_cgroup_init(void)
+bool page_cgroup_init(void)
{
unsigned long pfn;
int nid;
if (mem_cgroup_disabled())
- return;
+ return 0;
for_each_node_state(nid, N_MEMORY) {
unsigned long start_pfn, end_pfn;
@@ -299,17 +300,13 @@ void __init page_cgroup_init(void)
if (pfn_to_nid(pfn) != nid)
continue;
if (init_section_page_cgroup(pfn, nid))
- goto oom;
+ return 1;
}
}
+#ifdef CONFIG_MEMORY_HOTPLUG
hotplug_memory_notifier(page_cgroup_callback, 0);
- printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
- printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
- "don't want memory cgroups\n");
- return;
-oom:
- printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
- panic("Out of memory");
+#endif
+ return 0;
}
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
--
1.8.1.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] memcg: defer page_cgroup initialization
2013-04-09 9:25 [PATCH] memcg: defer page_cgroup initialization Glauber Costa
@ 2013-04-09 13:36 ` Johannes Weiner
2013-04-09 13:45 ` Glauber Costa
2013-04-09 14:06 ` Glauber Costa
2013-04-09 16:15 ` Kamezawa Hiroyuki
1 sibling, 2 replies; 5+ messages in thread
From: Johannes Weiner @ 2013-04-09 13:36 UTC (permalink / raw)
To: Glauber Costa
Cc: linux-mm, cgroups, Andrew Morton, Li Zefan, Michal Hocko,
Kamezawa Hiroyuki
On Tue, Apr 09, 2013 at 01:25:11PM +0400, Glauber Costa wrote:
> We have now reached the point in which there is no real need to allocate
> page_cgroup upon system boot. We can defer it to the first memcg
> initialization, and if it fails, we treat it like any other memcg memory
> failures (like for instance, if the mem_cgroup structure itself failed).
> In the future, we may want to defer this to the first non-root cgroup
> initialization, but we are not there yet. With that, page_cgroup can be
> more silent in its initialization.
>
> Unfortunately, doing that for flatmem models would lead to significant
> vmalloc-area waste. Since big-memory 32-bit machines are quite common,
> this would be reality for most of them. This means that we will leave
> FLATMEM alone, and fix only the SPARSEMEM case. We modify the message
> slightly so that in future reports we know precisely if this message is
> from a flatmem kernel or a older kernel initializing page_cgroup early.
>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> ---
> include/linux/page_cgroup.h | 13 +++++++------
> init/main.c | 1 -
> mm/memcontrol.c | 2 ++
> mm/page_cgroup.c | 19 ++++++++-----------
> 4 files changed, 17 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 777a524..bfb43f0 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -33,11 +33,16 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
> static inline void __init page_cgroup_init_flatmem(void)
> {
> }
> -extern void __init page_cgroup_init(void);
> +extern bool page_cgroup_init(void);
> #else
> void __init page_cgroup_init_flatmem(void);
> -static inline void __init page_cgroup_init(void)
> +/*
> + * If we reach here, we would have already initialized flatmem mappings.
> + * So just always succeed
> + */
> +static inline bool page_cgroup_init(void)
> {
> + return 0;
Could you please make it either int (*)(void) OR return true for
success? :-)
> @@ -78,7 +78,8 @@ void __init page_cgroup_init_flatmem(void)
> }
> printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
> printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
> - " don't want memory cgroups\n");
> + " don't want memory cgroups. Alternatively, use SPARSEMEM mappings"
> + " to defer initialization until actual use.");
Isn't that promising a bit much as long as "actual use" means "until
we create the root_mem_cgroup during boot time"?
> @@ -299,17 +300,13 @@ void __init page_cgroup_init(void)
> if (pfn_to_nid(pfn) != nid)
> continue;
> if (init_section_page_cgroup(pfn, nid))
> - goto oom;
> + return 1;
> }
> }
> +#ifdef CONFIG_MEMORY_HOTPLUG
> hotplug_memory_notifier(page_cgroup_callback, 0);
> - printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
> - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
> - "don't want memory cgroups\n");
> - return;
> -oom:
> - printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
> - panic("Out of memory");
> +#endif
> + return 0;
Ok, so this message will be replaced with BUG() in cgroup.c, right?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] memcg: defer page_cgroup initialization
2013-04-09 13:36 ` Johannes Weiner
@ 2013-04-09 13:45 ` Glauber Costa
2013-04-09 14:06 ` Glauber Costa
1 sibling, 0 replies; 5+ messages in thread
From: Glauber Costa @ 2013-04-09 13:45 UTC (permalink / raw)
To: Johannes Weiner
Cc: linux-mm, cgroups, Andrew Morton, Li Zefan, Michal Hocko,
Kamezawa Hiroyuki
On 04/09/2013 05:36 PM, Johannes Weiner wrote:
> On Tue, Apr 09, 2013 at 01:25:11PM +0400, Glauber Costa wrote:
>> We have now reached the point in which there is no real need to allocate
>> page_cgroup upon system boot. We can defer it to the first memcg
>> initialization, and if it fails, we treat it like any other memcg memory
>> failures (like for instance, if the mem_cgroup structure itself failed).
>> In the future, we may want to defer this to the first non-root cgroup
>> initialization, but we are not there yet. With that, page_cgroup can be
>> more silent in its initialization.
>>
>> Unfortunately, doing that for flatmem models would lead to significant
>> vmalloc-area waste. Since big-memory 32-bit machines are quite common,
>> this would be reality for most of them. This means that we will leave
>> FLATMEM alone, and fix only the SPARSEMEM case. We modify the message
>> slightly so that in future reports we know precisely if this message is
>> from a flatmem kernel or a older kernel initializing page_cgroup early.
>>
>> Signed-off-by: Glauber Costa <glommer@parallels.com>
>> Cc: Michal Hocko <mhocko@suse.cz>
>> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>> ---
>> include/linux/page_cgroup.h | 13 +++++++------
>> init/main.c | 1 -
>> mm/memcontrol.c | 2 ++
>> mm/page_cgroup.c | 19 ++++++++-----------
>> 4 files changed, 17 insertions(+), 18 deletions(-)
>>
>> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
>> index 777a524..bfb43f0 100644
>> --- a/include/linux/page_cgroup.h
>> +++ b/include/linux/page_cgroup.h
>> @@ -33,11 +33,16 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
>> static inline void __init page_cgroup_init_flatmem(void)
>> {
>> }
>> -extern void __init page_cgroup_init(void);
>> +extern bool page_cgroup_init(void);
>> #else
>> void __init page_cgroup_init_flatmem(void);
>> -static inline void __init page_cgroup_init(void)
>> +/*
>> + * If we reach here, we would have already initialized flatmem mappings.
>> + * So just always succeed
>> + */
>> +static inline bool page_cgroup_init(void)
>> {
>> + return 0;
>
> Could you please make it either int (*)(void) OR return true for
> success? :-)
I can return true, of course.
>
>> @@ -78,7 +78,8 @@ void __init page_cgroup_init_flatmem(void)
>> }
>> printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
>> printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
>> - " don't want memory cgroups\n");
>> + " don't want memory cgroups. Alternatively, use SPARSEMEM mappings"
>> + " to defer initialization until actual use.");
>
> Isn't that promising a bit much as long as "actual use" means "until
> we create the root_mem_cgroup during boot time"?
>
If you look at this patch alone, then yes, maybe. (It is still correct,
though). Mostly, I wanted the message to change somehow, so if we get
reports about it, we can easily differentiate a flatmem scenario from an
older kernel, in which both messages are equal. And also inform the user
that he could be better of if using SPARSEMEM.
>> @@ -299,17 +300,13 @@ void __init page_cgroup_init(void)
>> if (pfn_to_nid(pfn) != nid)
>> continue;
>> if (init_section_page_cgroup(pfn, nid))
>> - goto oom;
>> + return 1;
>> }
>> }
>> +#ifdef CONFIG_MEMORY_HOTPLUG
>> hotplug_memory_notifier(page_cgroup_callback, 0);
>> - printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
>> - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
>> - "don't want memory cgroups\n");
>> - return;
>> -oom:
>> - printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
>> - panic("Out of memory");
>> +#endif
>> + return 0;
>
> Ok, so this message will be replaced with BUG() in cgroup.c, right?
For the root cgroup, yes. When page_cgroup_init is moved to the
first-non-root, then it is just a normal ENOMEM situation. More
importantly, because it becomes just memory allocation, and not a
special boot-time memory allocation, then it just follow the normal
error paths whatever they are.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] memcg: defer page_cgroup initialization
2013-04-09 13:36 ` Johannes Weiner
2013-04-09 13:45 ` Glauber Costa
@ 2013-04-09 14:06 ` Glauber Costa
1 sibling, 0 replies; 5+ messages in thread
From: Glauber Costa @ 2013-04-09 14:06 UTC (permalink / raw)
To: Johannes Weiner
Cc: linux-mm, cgroups, Andrew Morton, Li Zefan, Michal Hocko,
Kamezawa Hiroyuki
On 04/09/2013 05:36 PM, Johannes Weiner wrote:
> Could you please make it either int (*)(void) OR return true for
> success? :-)
I can actually do better: I can return an error code and then do the
cleanup on error that I forgot to backport from the bypass patchset...
Thanks for drawing my attention to it.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] memcg: defer page_cgroup initialization
2013-04-09 9:25 [PATCH] memcg: defer page_cgroup initialization Glauber Costa
2013-04-09 13:36 ` Johannes Weiner
@ 2013-04-09 16:15 ` Kamezawa Hiroyuki
1 sibling, 0 replies; 5+ messages in thread
From: Kamezawa Hiroyuki @ 2013-04-09 16:15 UTC (permalink / raw)
To: Glauber Costa
Cc: linux-mm, cgroups, Andrew Morton, Li Zefan, Michal Hocko,
Johannes Weiner
(2013/04/09 18:25), Glauber Costa wrote:
> We have now reached the point in which there is no real need to allocate
> page_cgroup upon system boot. We can defer it to the first memcg
> initialization, and if it fails, we treat it like any other memcg memory
> failures (like for instance, if the mem_cgroup structure itself failed).
> In the future, we may want to defer this to the first non-root cgroup
> initialization, but we are not there yet. With that, page_cgroup can be
> more silent in its initialization.
>
> Unfortunately, doing that for flatmem models would lead to significant
> vmalloc-area waste. Since big-memory 32-bit machines are quite common,
> this would be reality for most of them. This means that we will leave
> FLATMEM alone, and fix only the SPARSEMEM case. We modify the message
> slightly so that in future reports we know precisely if this message is
> from a flatmem kernel or a older kernel initializing page_cgroup early.
>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> ---
> include/linux/page_cgroup.h | 13 +++++++------
> init/main.c | 1 -
> mm/memcontrol.c | 2 ++
> mm/page_cgroup.c | 19 ++++++++-----------
> 4 files changed, 17 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 777a524..bfb43f0 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -33,11 +33,16 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
> static inline void __init page_cgroup_init_flatmem(void)
> {
> }
> -extern void __init page_cgroup_init(void);
> +extern bool page_cgroup_init(void);
> #else
> void __init page_cgroup_init_flatmem(void);
> -static inline void __init page_cgroup_init(void)
> +/*
> + * If we reach here, we would have already initialized flatmem mappings.
> + * So just always succeed
> + */
> +static inline bool page_cgroup_init(void)
> {
> + return 0;
> }
> #endif
>
> @@ -94,10 +99,6 @@ static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
> return NULL;
> }
>
> -static inline void page_cgroup_init(void)
> -{
> -}
> -
> static inline void __init page_cgroup_init_flatmem(void)
> {
> }
> diff --git a/init/main.c b/init/main.c
> index cee4b5c..49aa019 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -592,7 +592,6 @@ asmlinkage void __init start_kernel(void)
> initrd_start = 0;
> }
> #endif
> - page_cgroup_init();
> debug_objects_mem_init();
> kmemleak_init();
> setup_per_cpu_pageset();
This is good.
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f608546..59a5b1f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6357,6 +6357,8 @@ mem_cgroup_css_alloc(struct cgroup *cont)
> res_counter_init(&memcg->res, NULL);
> res_counter_init(&memcg->memsw, NULL);
> res_counter_init(&memcg->kmem, NULL);
> + if (page_cgroup_init())
> + goto free_out;
> }
>
> memcg->last_scanned_node = MAX_NUMNODES;
> diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
> index 6d757e3..679de38 100644
> --- a/mm/page_cgroup.c
> +++ b/mm/page_cgroup.c
> @@ -78,7 +78,8 @@ void __init page_cgroup_init_flatmem(void)
> }
> printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
> printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
> - " don't want memory cgroups\n");
> + " don't want memory cgroups. Alternatively, use SPARSEMEM mappings"
> + " to defer initialization until actual use.");
I'm sorry that I think you don't defer initialization until actual use...yet.
page_cgroup() will allocated at initialization even if memory cgroup is not
mounted at all. Right ?
> return;
> fail:
> printk(KERN_CRIT "allocation of page_cgroup failed.\n");
> @@ -266,13 +267,13 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
>
> #endif
>
> -void __init page_cgroup_init(void)
> +bool page_cgroup_init(void)
> {
> unsigned long pfn;
> int nid;
>
> if (mem_cgroup_disabled())
> - return;
> + return 0;
>
> for_each_node_state(nid, N_MEMORY) {
> unsigned long start_pfn, end_pfn;
> @@ -299,17 +300,13 @@ void __init page_cgroup_init(void)
> if (pfn_to_nid(pfn) != nid)
> continue;
> if (init_section_page_cgroup(pfn, nid))
> - goto oom;
> + return 1;
> }
> }
> +#ifdef CONFIG_MEMORY_HOTPLUG
> hotplug_memory_notifier(page_cgroup_callback, 0);
> - printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
> - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
> - "don't want memory cgroups\n");
> - return;
> -oom:
> - printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
> - panic("Out of memory");
> +#endif
> + return 0;
IIUC, hotplug_memory_notifier() is NOP if CONFIG_MEMORY_HOTPLUG is set.
So, you don't need to add #ifdef
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2013-04-09 16:16 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-09 9:25 [PATCH] memcg: defer page_cgroup initialization Glauber Costa
2013-04-09 13:36 ` Johannes Weiner
2013-04-09 13:45 ` Glauber Costa
2013-04-09 14:06 ` Glauber Costa
2013-04-09 16:15 ` Kamezawa Hiroyuki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox