* [PATCH] use CSS ID in swap_cgroup for saving memory
@ 2009-02-25 6:26 KAMEZAWA Hiroyuki
2009-02-25 7:09 ` Li Zefan
0 siblings, 1 reply; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-25 6:26 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-mm, lizf, nishimura, balbir, menage, akpm
Maybe ready for wider test. This is original purpose for adding CSS ID to cgroup.
against mmotm-2009-02-24-16-23
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
This patch tires to use CSS ID for records in swap_cgroup.
By this, on 64bit machine, size of swap_cgroup goes down to 2 bytes from 8bytes.
This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)
From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
To size of swap_cgroup = 2G/4k * 2 = 1Mbytes.
Reduction is large. Of course, there are trade-offs. This CSS ID will add
overhead to swap-in/swap-out/swap-free.
But in general,
- swap is a resource which the user tend to avoid use.
- If swap is never used, swap_cgroup area is not used.
- Reading traditional manuals, size of swap should be proportional to
size of memory. Memory size of machine is increasing now.
I think reducing size of swap_cgroup makes sense.
Note:
- ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
- memcg can be obsolete at rmdir() but not freed while refcnt from
swap_cgroup is available.
Changelog: v2 -> v3
- fixed a NULL pointer bug reported by Nishimura.
- fixed message in dmesg
Changelog: v1 -> v2
- removed css_tryget().
- fixed texts
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/page_cgroup.h | 9 ++----
mm/memcontrol.c | 66 ++++++++++++++++++++++++++++++++++++--------
mm/page_cgroup.c | 28 +++++++++---------
3 files changed, 73 insertions(+), 30 deletions(-)
Index: mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
===================================================================
--- mmotm-2.6.29-Feb24.orig/include/linux/page_cgroup.h
+++ mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
@@ -91,22 +91,21 @@ static inline void page_cgroup_init(void
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
#include <linux/swap.h>
-extern struct mem_cgroup *
-swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
-extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
#else
#include <linux/swap.h>
static inline
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
return NULL;
}
static inline
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
return NULL;
}
Index: mmotm-2.6.29-Feb24/mm/memcontrol.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/memcontrol.c
+++ mmotm-2.6.29-Feb24/mm/memcontrol.c
@@ -991,20 +991,41 @@ nomem:
return -ENOMEM;
}
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller must check css_is_removed() or some if
+ * it's concern. (dropping refcnt from swap can be called against removed
+ * memcg.)
+ */
+static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
+{
+ struct cgroup_subsys_state *css;
+
+ /* ID 0 is unused ID */
+ if (!id)
+ return NULL;
+ css = css_lookup(&mem_cgroup_subsys, id);
+ if (!css)
+ return NULL;
+ return container_of(css, struct mem_cgroup, css);
+}
+
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
- struct mem_cgroup *mem;
+ unsigned short id;
+ struct mem_cgroup *mem = NULL;
swp_entry_t ent;
if (!PageSwapCache(page))
return NULL;
ent.val = page_private(page);
- mem = lookup_swap_cgroup(ent);
- if (!mem)
- return NULL;
- if (!css_tryget(&mem->css))
- return NULL;
+ id = lookup_swap_cgroup(ent);
+ rcu_read_lock();
+ mem = mem_cgroup_lookup(id);
+ if (mem && !css_tryget(&mem->css))
+ mem = NULL;
+ rcu_read_unlock();
return mem;
}
@@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page
if (do_swap_account && !ret && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
+ unsigned short id;
/* avoid double counting */
- mem = swap_cgroup_record(ent, NULL);
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ mem = mem_cgroup_lookup(id);
if (mem) {
+ /*
+ * Recorded ID can be obsolete. We avoid calling
+ * css_tryget()
+ */
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
mem_cgroup_put(mem);
}
+ rcu_read_unlock();
}
return ret;
}
@@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
*/
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
+ unsigned short id;
struct mem_cgroup *memcg;
- memcg = swap_cgroup_record(ent, NULL);
+
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
if (memcg) {
+ /*
+ * This recorded memcg can be obsolete one. So, avoid
+ * calling css_tryget
+ */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
-
+ rcu_read_unlock();
}
/* add this page(page_cgroup) to the LRU we want. */
@@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
/* record memcg information */
if (do_swap_account && memcg) {
- swap_cgroup_record(ent, memcg);
+ swap_cgroup_record(ent, css_id(&memcg->css));
mem_cgroup_get(memcg);
}
if (memcg)
@@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
void mem_cgroup_uncharge_swap(swp_entry_t ent)
{
struct mem_cgroup *memcg;
+ unsigned short id;
if (!do_swap_account)
return;
- memcg = swap_cgroup_record(ent, NULL);
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
if (memcg) {
+ /*
+ * This memcg can be obsolete one. We avoid calling css_tryget
+ */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
+ rcu_read_unlock();
}
#endif
Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
+++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
@@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
* cgroup rather than pointer.
*/
struct swap_cgroup {
- struct mem_cgroup *val;
+ unsigned short id;
};
#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
#define SC_POS_MASK (SC_PER_PAGE - 1)
@@ -342,10 +342,10 @@ not_enough_page:
* @ent: swap entry to be recorded into
* @mem: mem_cgroup to be recorded
*
- * Returns old value at success, NULL at failure.
- * (Of course, old value can be NULL.)
+ * Returns old value at success, 0 at failure.
+ * (Of course, old value can be 0.)
*/
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@@ -354,18 +354,18 @@ struct mem_cgroup *swap_cgroup_record(sw
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
- struct mem_cgroup *old;
+ unsigned short old;
if (!do_swap_account)
- return NULL;
+ return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
- old = sc->val;
- sc->val = mem;
+ old = sc->id;
+ sc->id = id;
return old;
}
@@ -374,9 +374,9 @@ struct mem_cgroup *swap_cgroup_record(sw
* lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
* @ent: swap entry to be looked up.
*
- * Returns pointer to mem_cgroup at success. NULL at failure.
+ * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
*/
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@@ -385,16 +385,16 @@ struct mem_cgroup *lookup_swap_cgroup(sw
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
- struct mem_cgroup *ret;
+ unsigned short ret;
if (!do_swap_account)
- return NULL;
+ return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
- ret = sc->val;
+ ret = sc->id;
return ret;
}
@@ -432,7 +432,7 @@ int swap_cgroup_swapon(int type, unsigne
printk(KERN_INFO
"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
- " and %ld bytes to hold mem_cgroup pointers on swap\n",
+ " and %ld bytes to hold mem_cgroup information per swap ents\n",
array_size, length * PAGE_SIZE);
printk(KERN_INFO
"swap_cgroup can be disabled by noswapaccount boot option.\n");
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] use CSS ID in swap_cgroup for saving memory
2009-02-25 6:26 [PATCH] use CSS ID in swap_cgroup for saving memory KAMEZAWA Hiroyuki
@ 2009-02-25 7:09 ` Li Zefan
2009-02-25 7:15 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 4+ messages in thread
From: Li Zefan @ 2009-02-25 7:09 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-kernel, linux-mm, nishimura, balbir, menage, akpm
> static inline
> -struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
> +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
> {
> return NULL;
return 0;
> }
>
> static inline
> -struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
> +unsigned short lookup_swap_cgroup(swp_entry_t ent)
> {
> return NULL;
return 0;
> }
> @@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page
>
> if (do_swap_account && !ret && PageSwapCache(page)) {
> swp_entry_t ent = {.val = page_private(page)};
> + unsigned short id;
> /* avoid double counting */
> - mem = swap_cgroup_record(ent, NULL);
> + id = swap_cgroup_record(ent, 0);
> + rcu_read_lock();
> + mem = mem_cgroup_lookup(id);
> if (mem) {
> + /*
> + * Recorded ID can be obsolete. We avoid calling
> + * css_tryget()
> + */
> res_counter_uncharge(&mem->memsw, PAGE_SIZE);
> mem_cgroup_put(mem);
> }
> + rcu_read_unlock();
> }
> return ret;
> }
> @@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
> */
> if (do_swap_account && PageSwapCache(page)) {
> swp_entry_t ent = {.val = page_private(page)};
> + unsigned short id;
> struct mem_cgroup *memcg;
> - memcg = swap_cgroup_record(ent, NULL);
> +
> + id = swap_cgroup_record(ent, 0);
> + rcu_read_lock();
> + memcg = mem_cgroup_lookup(id);
> if (memcg) {
> + /*
> + * This recorded memcg can be obsolete one. So, avoid
> + * calling css_tryget
> + */
> res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> mem_cgroup_put(memcg);
> }
> -
> + rcu_read_unlock();
> }
> /* add this page(page_cgroup) to the LRU we want. */
>
> @@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
> MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
> /* record memcg information */
> if (do_swap_account && memcg) {
> - swap_cgroup_record(ent, memcg);
> + swap_cgroup_record(ent, css_id(&memcg->css));
> mem_cgroup_get(memcg);
> }
> if (memcg)
> @@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
> void mem_cgroup_uncharge_swap(swp_entry_t ent)
> {
> struct mem_cgroup *memcg;
> + unsigned short id;
>
> if (!do_swap_account)
> return;
>
> - memcg = swap_cgroup_record(ent, NULL);
> + id = swap_cgroup_record(ent, 0);
> + rcu_read_lock();
> + memcg = mem_cgroup_lookup(id);
> if (memcg) {
> + /*
> + * This memcg can be obsolete one. We avoid calling css_tryget
> + */
> res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> mem_cgroup_put(memcg);
> }
> + rcu_read_unlock();
can we have a common function for the above 3 pieces of code?
> }
> #endif
>
> Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
> ===================================================================
> --- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
> +++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
> @@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
> * cgroup rather than pointer.
> */
this comment should be updated/removed:
/*
* This 8bytes seems big..maybe we can reduce this when we can use "id" for
* cgroup rather than pointer.
*/
> struct swap_cgroup {
> - struct mem_cgroup *val;
> + unsigned short id;
> };
> #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
> #define SC_POS_MASK (SC_PER_PAGE - 1)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] use CSS ID in swap_cgroup for saving memory
2009-02-25 7:09 ` Li Zefan
@ 2009-02-25 7:15 ` KAMEZAWA Hiroyuki
2009-02-26 6:55 ` [PATCH] use CSS ID in swap_cgroup for saving memory v4 KAMEZAWA Hiroyuki
0 siblings, 1 reply; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-25 7:15 UTC (permalink / raw)
To: Li Zefan; +Cc: linux-kernel, linux-mm, nishimura, balbir, menage, akpm
On Wed, 25 Feb 2009 15:09:20 +0800
Li Zefan <lizf@cn.fujitsu.com> wrote:
> > static inline
> > -struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
> > +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
> > {
> > return NULL;
>
> return 0;
>
should be..
> > }
> >
> > static inline
> > -struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
> > +unsigned short lookup_swap_cgroup(swp_entry_t ent)
> > {
> > return NULL;
>
> return 0;
>
ok
> > }
>
> > @@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page
> >
> > if (do_swap_account && !ret && PageSwapCache(page)) {
> > swp_entry_t ent = {.val = page_private(page)};
> > + unsigned short id;
> > /* avoid double counting */
> > - mem = swap_cgroup_record(ent, NULL);
> > + id = swap_cgroup_record(ent, 0);
> > + rcu_read_lock();
> > + mem = mem_cgroup_lookup(id);
> > if (mem) {
> > + /*
> > + * Recorded ID can be obsolete. We avoid calling
> > + * css_tryget()
> > + */
> > res_counter_uncharge(&mem->memsw, PAGE_SIZE);
> > mem_cgroup_put(mem);
> > }
> > + rcu_read_unlock();
> > }
> > return ret;
> > }
> > @@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
> > */
> > if (do_swap_account && PageSwapCache(page)) {
> > swp_entry_t ent = {.val = page_private(page)};
> > + unsigned short id;
> > struct mem_cgroup *memcg;
> > - memcg = swap_cgroup_record(ent, NULL);
> > +
> > + id = swap_cgroup_record(ent, 0);
> > + rcu_read_lock();
> > + memcg = mem_cgroup_lookup(id);
> > if (memcg) {
> > + /*
> > + * This recorded memcg can be obsolete one. So, avoid
> > + * calling css_tryget
> > + */
> > res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> > mem_cgroup_put(memcg);
> > }
> > -
> > + rcu_read_unlock();
> > }
> > /* add this page(page_cgroup) to the LRU we want. */
> >
> > @@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
> > MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
> > /* record memcg information */
> > if (do_swap_account && memcg) {
> > - swap_cgroup_record(ent, memcg);
> > + swap_cgroup_record(ent, css_id(&memcg->css));
> > mem_cgroup_get(memcg);
> > }
> > if (memcg)
> > @@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
> > void mem_cgroup_uncharge_swap(swp_entry_t ent)
> > {
> > struct mem_cgroup *memcg;
> > + unsigned short id;
> >
> > if (!do_swap_account)
> > return;
> >
> > - memcg = swap_cgroup_record(ent, NULL);
> > + id = swap_cgroup_record(ent, 0);
> > + rcu_read_lock();
> > + memcg = mem_cgroup_lookup(id);
> > if (memcg) {
> > + /*
> > + * This memcg can be obsolete one. We avoid calling css_tryget
> > + */
> > res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> > mem_cgroup_put(memcg);
> > }
> > + rcu_read_unlock();
>
> can we have a common function for the above 3 pieces of code?
>
I don't think it's better. All are under rcu_read_lock() and does
"charge" management in diffferent meanings/context.
These small pieces of code are worth to be open coded.
> > }
> > #endif
> >
> > Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
> > ===================================================================
> > --- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
> > +++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
> > @@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
> > * cgroup rather than pointer.
> > */
>
> this comment should be updated/removed:
>
> /*
> * This 8bytes seems big..maybe we can reduce this when we can use "id" for
> * cgroup rather than pointer.
> */
>
Ah, I missed this.
I'll update and post tomorrow, again if no "don't do that"
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] use CSS ID in swap_cgroup for saving memory v4
2009-02-25 7:15 ` KAMEZAWA Hiroyuki
@ 2009-02-26 6:55 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-26 6:55 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Li Zefan, linux-kernel, linux-mm, nishimura, balbir, menage, akpm
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
This patch tires to use CSS ID for records in swap_cgroup.
By this, on 64bit machine, size of swap_cgroup goes down to 2 bytes from 8bytes.
This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)
From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
To size of swap_cgroup = 2G/4k * 2 = 1Mbytes.
Reduction is large. Of course, there are trade-offs. This CSS ID will add
overhead to swap-in/swap-out/swap-free.
But in general,
- swap is a resource which the user tend to avoid use.
- If swap is never used, swap_cgroup area is not used.
- Reading traditional manuals, size of swap should be proportional to
size of memory. Memory size of machine is increasing now.
I think reducing size of swap_cgroup makes sense.
Note:
- ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
- memcg can be obsolete at rmdir() but not freed while refcnt from
swap_cgroup is available.
This is still under test. Any comments are welcome.
Changelog: v3 -> v4
- fixed not configured case
- deleted unnecessary comments
Changelog: v2 -> v3
- fixed a NULL pointer bug reported by Nishimura.
- fixed message in dmesg
Changelog: v1 -> v2
- removed css_tryget().
- fixed texts
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
Index: mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
===================================================================
--- mmotm-2.6.29-Feb24.orig/include/linux/page_cgroup.h
+++ mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
#include <linux/swap.h>
-extern struct mem_cgroup *
-swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
-extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
#else
#include <linux/swap.h>
static inline
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
- return NULL;
+ return 0;
}
static inline
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
- return NULL;
+ return 0;
}
static inline int
Index: mmotm-2.6.29-Feb24/mm/memcontrol.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/memcontrol.c
+++ mmotm-2.6.29-Feb24/mm/memcontrol.c
@@ -991,20 +991,41 @@ nomem:
return -ENOMEM;
}
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller must check css_is_removed() or some if
+ * it's concern. (dropping refcnt from swap can be called against removed
+ * memcg.)
+ */
+static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
+{
+ struct cgroup_subsys_state *css;
+
+ /* ID 0 is unused ID */
+ if (!id)
+ return NULL;
+ css = css_lookup(&mem_cgroup_subsys, id);
+ if (!css)
+ return NULL;
+ return container_of(css, struct mem_cgroup, css);
+}
+
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
- struct mem_cgroup *mem;
+ unsigned short id;
+ struct mem_cgroup *mem = NULL;
swp_entry_t ent;
if (!PageSwapCache(page))
return NULL;
ent.val = page_private(page);
- mem = lookup_swap_cgroup(ent);
- if (!mem)
- return NULL;
- if (!css_tryget(&mem->css))
- return NULL;
+ id = lookup_swap_cgroup(ent);
+ rcu_read_lock();
+ mem = mem_cgroup_lookup(id);
+ if (mem && !css_tryget(&mem->css))
+ mem = NULL;
+ rcu_read_unlock();
return mem;
}
@@ -1265,12 +1286,22 @@ int mem_cgroup_cache_charge(struct page
if (do_swap_account && !ret && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
+ unsigned short id;
/* avoid double counting */
- mem = swap_cgroup_record(ent, NULL);
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ mem = mem_cgroup_lookup(id);
if (mem) {
+ /*
+ * We did swap-in. Then, this entry is doubly counted
+ * both in mem and memsw. We uncharge it, here.
+ * Recorded ID can be obsolete. We avoid calling
+ * css_tryget()
+ */
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
mem_cgroup_put(mem);
}
+ rcu_read_unlock();
}
return ret;
}
@@ -1335,13 +1366,21 @@ void mem_cgroup_commit_charge_swapin(str
*/
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
+ unsigned short id;
struct mem_cgroup *memcg;
- memcg = swap_cgroup_record(ent, NULL);
+
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
if (memcg) {
+ /*
+ * This recorded memcg can be obsolete one. So, avoid
+ * calling css_tryget
+ */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
-
+ rcu_read_unlock();
}
/* add this page(page_cgroup) to the LRU we want. */
@@ -1462,7 +1501,7 @@ void mem_cgroup_uncharge_swapcache(struc
MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
/* record memcg information */
if (do_swap_account && memcg) {
- swap_cgroup_record(ent, memcg);
+ swap_cgroup_record(ent, css_id(&memcg->css));
mem_cgroup_get(memcg);
}
if (memcg)
@@ -1477,15 +1516,23 @@ void mem_cgroup_uncharge_swapcache(struc
void mem_cgroup_uncharge_swap(swp_entry_t ent)
{
struct mem_cgroup *memcg;
+ unsigned short id;
if (!do_swap_account)
return;
- memcg = swap_cgroup_record(ent, NULL);
+ id = swap_cgroup_record(ent, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
if (memcg) {
+ /*
+ * We uncharge this because swap is freed.
+ * This memcg can be obsolete one. We avoid calling css_tryget
+ */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
+ rcu_read_unlock();
}
#endif
Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
+++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {
struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
-/*
- * This 8bytes seems big..maybe we can reduce this when we can use "id" for
- * cgroup rather than pointer.
- */
struct swap_cgroup {
- struct mem_cgroup *val;
+ unsigned short id;
};
#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
#define SC_POS_MASK (SC_PER_PAGE - 1)
@@ -342,10 +338,10 @@ not_enough_page:
* @ent: swap entry to be recorded into
* @mem: mem_cgroup to be recorded
*
- * Returns old value at success, NULL at failure.
- * (Of course, old value can be NULL.)
+ * Returns old value at success, 0 at failure.
+ * (Of course, old value can be 0.)
*/
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(sw
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
- struct mem_cgroup *old;
+ unsigned short old;
if (!do_swap_account)
- return NULL;
+ return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
- old = sc->val;
- sc->val = mem;
+ old = sc->id;
+ sc->id = id;
return old;
}
@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(sw
* lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
* @ent: swap entry to be looked up.
*
- * Returns pointer to mem_cgroup at success. NULL at failure.
+ * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
*/
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(sw
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
- struct mem_cgroup *ret;
+ unsigned short ret;
if (!do_swap_account)
- return NULL;
+ return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
- ret = sc->val;
+ ret = sc->id;
return ret;
}
@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigne
printk(KERN_INFO
"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
- " and %ld bytes to hold mem_cgroup pointers on swap\n",
+ " and %ld bytes to hold mem_cgroup information per swap ents\n",
array_size, length * PAGE_SIZE);
printk(KERN_INFO
"swap_cgroup can be disabled by noswapaccount boot option.\n");
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-02-26 6:56 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-25 6:26 [PATCH] use CSS ID in swap_cgroup for saving memory KAMEZAWA Hiroyuki
2009-02-25 7:09 ` Li Zefan
2009-02-25 7:15 ` KAMEZAWA Hiroyuki
2009-02-26 6:55 ` [PATCH] use CSS ID in swap_cgroup for saving memory v4 KAMEZAWA Hiroyuki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox