linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] use CSS ID in swap_cgroup for saving memory
@ 2009-02-25  6:26 KAMEZAWA Hiroyuki
  2009-02-25  7:09 ` Li Zefan
  0 siblings, 1 reply; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-25  6:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-mm, lizf, nishimura, balbir, menage, akpm

Maybe ready for wider test. This is original purpose for adding CSS ID to cgroup.
against mmotm-2009-02-24-16-23
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

This patch tires to use CSS ID for records in swap_cgroup.
By this, on 64bit machine, size of swap_cgroup goes down to 2 bytes from 8bytes.

This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)
	From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
	To   size of swap_cgroup = 2G/4k * 2 = 1Mbytes.
Reduction is large. Of course, there are trade-offs. This CSS ID will add
overhead to swap-in/swap-out/swap-free.

But in general,
  - swap is a resource which the user tend to avoid use.
  - If swap is never used, swap_cgroup area is not used.
  - Reading traditional manuals, size of swap should be proportional to
    size of memory. Memory size of machine is increasing now.

I think reducing size of swap_cgroup makes sense.
    
Note:
  - ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
  - memcg can be obsolete at rmdir() but not freed while refcnt from
    swap_cgroup is available.

Changelog: v2 -> v3
 - fixed a NULL pointer bug reported by Nishimura.
 - fixed message in dmesg

Changelog: v1 -> v2
 - removed css_tryget().
 - fixed texts

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 include/linux/page_cgroup.h |    9 ++----
 mm/memcontrol.c             |   66 ++++++++++++++++++++++++++++++++++++--------
 mm/page_cgroup.c            |   28 +++++++++---------
 3 files changed, 73 insertions(+), 30 deletions(-)

Index: mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
===================================================================
--- mmotm-2.6.29-Feb24.orig/include/linux/page_cgroup.h
+++ mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
@@ -91,22 +91,21 @@ static inline void page_cgroup_init(void
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 #include <linux/swap.h>
-extern struct mem_cgroup *
-swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
-extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
 #include <linux/swap.h>
 
 static inline
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
 	return NULL;
 }
 
 static inline
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
 {
 	return NULL;
 }
Index: mmotm-2.6.29-Feb24/mm/memcontrol.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/memcontrol.c
+++ mmotm-2.6.29-Feb24/mm/memcontrol.c
@@ -991,20 +991,41 @@ nomem:
 	return -ENOMEM;
 }
 
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller must check css_is_removed() or some if
+ * it's concern. (dropping refcnt from swap can be called against removed
+ * memcg.)
+ */
+static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
+{
+	struct cgroup_subsys_state *css;
+
+	/* ID 0 is unused ID */
+	if (!id)
+		return NULL;
+	css = css_lookup(&mem_cgroup_subsys, id);
+	if (!css)
+		return NULL;
+	return container_of(css, struct mem_cgroup, css);
+}
+
 static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
 {
-	struct mem_cgroup *mem;
+	unsigned short id;
+	struct mem_cgroup *mem = NULL;
 	swp_entry_t ent;
 
 	if (!PageSwapCache(page))
 		return NULL;
 
 	ent.val = page_private(page);
-	mem = lookup_swap_cgroup(ent);
-	if (!mem)
-		return NULL;
-	if (!css_tryget(&mem->css))
-		return NULL;
+	id = lookup_swap_cgroup(ent);
+	rcu_read_lock();
+	mem = mem_cgroup_lookup(id);
+	if (mem && !css_tryget(&mem->css))
+		mem = NULL;
+	rcu_read_unlock();
 	return mem;
 }
 
@@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page 
 
 	if (do_swap_account && !ret && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		unsigned short id;
 		/* avoid double counting */
-		mem = swap_cgroup_record(ent, NULL);
+		id = swap_cgroup_record(ent, 0);
+		rcu_read_lock();
+		mem = mem_cgroup_lookup(id);
 		if (mem) {
+			/*
+			 * Recorded ID can be obsolete. We avoid calling
+			 * css_tryget()
+			 */
 			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 			mem_cgroup_put(mem);
 		}
+		rcu_read_unlock();
 	}
 	return ret;
 }
@@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
 	 */
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		unsigned short id;
 		struct mem_cgroup *memcg;
-		memcg = swap_cgroup_record(ent, NULL);
+
+		id = swap_cgroup_record(ent, 0);
+		rcu_read_lock();
+		memcg = mem_cgroup_lookup(id);
 		if (memcg) {
+			/*
+			 * This recorded memcg can be obsolete one. So, avoid
+			 * calling css_tryget
+			 */
 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 			mem_cgroup_put(memcg);
 		}
-
+		rcu_read_unlock();
 	}
 	/* add this page(page_cgroup) to the LRU we want. */
 
@@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
 					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
 	/* record memcg information */
 	if (do_swap_account && memcg) {
-		swap_cgroup_record(ent, memcg);
+		swap_cgroup_record(ent, css_id(&memcg->css));
 		mem_cgroup_get(memcg);
 	}
 	if (memcg)
@@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
 void mem_cgroup_uncharge_swap(swp_entry_t ent)
 {
 	struct mem_cgroup *memcg;
+	unsigned short id;
 
 	if (!do_swap_account)
 		return;
 
-	memcg = swap_cgroup_record(ent, NULL);
+	id = swap_cgroup_record(ent, 0);
+	rcu_read_lock();
+	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
+		/*
+		 * This memcg can be obsolete one. We avoid calling css_tryget
+		 */
 		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_put(memcg);
 	}
+	rcu_read_unlock();
 }
 #endif
 
Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
+++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
@@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
  * cgroup rather than pointer.
  */
 struct swap_cgroup {
-	struct mem_cgroup	*val;
+	unsigned short		id;
 };
 #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
 #define SC_POS_MASK	(SC_PER_PAGE - 1)
@@ -342,10 +342,10 @@ not_enough_page:
  * @ent: swap entry to be recorded into
  * @mem: mem_cgroup to be recorded
  *
- * Returns old value at success, NULL at failure.
- * (Of course, old value can be NULL.)
+ * Returns old value at success, 0 at failure.
+ * (Of course, old value can be 0.)
  */
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
 	int type = swp_type(ent);
 	unsigned long offset = swp_offset(ent);
@@ -354,18 +354,18 @@ struct mem_cgroup *swap_cgroup_record(sw
 	struct swap_cgroup_ctrl *ctrl;
 	struct page *mappage;
 	struct swap_cgroup *sc;
-	struct mem_cgroup *old;
+	unsigned short old;
 
 	if (!do_swap_account)
-		return NULL;
+		return 0;
 
 	ctrl = &swap_cgroup_ctrl[type];
 
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	old = sc->val;
-	sc->val = mem;
+	old = sc->id;
+	sc->id = id;
 
 	return old;
 }
@@ -374,9 +374,9 @@ struct mem_cgroup *swap_cgroup_record(sw
  * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
  * @ent: swap entry to be looked up.
  *
- * Returns pointer to mem_cgroup at success. NULL at failure.
+ * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
  */
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
 {
 	int type = swp_type(ent);
 	unsigned long offset = swp_offset(ent);
@@ -385,16 +385,16 @@ struct mem_cgroup *lookup_swap_cgroup(sw
 	struct swap_cgroup_ctrl *ctrl;
 	struct page *mappage;
 	struct swap_cgroup *sc;
-	struct mem_cgroup *ret;
+	unsigned short ret;
 
 	if (!do_swap_account)
-		return NULL;
+		return 0;
 
 	ctrl = &swap_cgroup_ctrl[type];
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	ret = sc->val;
+	ret = sc->id;
 	return ret;
 }
 
@@ -432,7 +432,7 @@ int swap_cgroup_swapon(int type, unsigne
 
 	printk(KERN_INFO
 		"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
-		" and %ld bytes to hold mem_cgroup pointers on swap\n",
+		" and %ld bytes to hold mem_cgroup information per swap ents\n",
 		array_size, length * PAGE_SIZE);
 	printk(KERN_INFO
 	"swap_cgroup can be disabled by noswapaccount boot option.\n");

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] use CSS ID in swap_cgroup for saving memory
  2009-02-25  6:26 [PATCH] use CSS ID in swap_cgroup for saving memory KAMEZAWA Hiroyuki
@ 2009-02-25  7:09 ` Li Zefan
  2009-02-25  7:15   ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 4+ messages in thread
From: Li Zefan @ 2009-02-25  7:09 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-kernel, linux-mm, nishimura, balbir, menage, akpm

>  static inline
> -struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
> +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
>  {
>  	return NULL;

return 0;

>  }
>  
>  static inline
> -struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
> +unsigned short lookup_swap_cgroup(swp_entry_t ent)
>  {
>  	return NULL;

return 0;

>  }

> @@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page 
>  
>  	if (do_swap_account && !ret && PageSwapCache(page)) {
>  		swp_entry_t ent = {.val = page_private(page)};
> +		unsigned short id;
>  		/* avoid double counting */
> -		mem = swap_cgroup_record(ent, NULL);
> +		id = swap_cgroup_record(ent, 0);
> +		rcu_read_lock();
> +		mem = mem_cgroup_lookup(id);
>  		if (mem) {
> +			/*
> +			 * Recorded ID can be obsolete. We avoid calling
> +			 * css_tryget()
> +			 */
>  			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
>  			mem_cgroup_put(mem);
>  		}
> +		rcu_read_unlock();
>  	}
>  	return ret;
>  }
> @@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
>  	 */
>  	if (do_swap_account && PageSwapCache(page)) {
>  		swp_entry_t ent = {.val = page_private(page)};
> +		unsigned short id;
>  		struct mem_cgroup *memcg;
> -		memcg = swap_cgroup_record(ent, NULL);
> +
> +		id = swap_cgroup_record(ent, 0);
> +		rcu_read_lock();
> +		memcg = mem_cgroup_lookup(id);
>  		if (memcg) {
> +			/*
> +			 * This recorded memcg can be obsolete one. So, avoid
> +			 * calling css_tryget
> +			 */
>  			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
>  			mem_cgroup_put(memcg);
>  		}
> -
> +		rcu_read_unlock();
>  	}
>  	/* add this page(page_cgroup) to the LRU we want. */
>  
> @@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
>  					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
>  	/* record memcg information */
>  	if (do_swap_account && memcg) {
> -		swap_cgroup_record(ent, memcg);
> +		swap_cgroup_record(ent, css_id(&memcg->css));
>  		mem_cgroup_get(memcg);
>  	}
>  	if (memcg)
> @@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
>  void mem_cgroup_uncharge_swap(swp_entry_t ent)
>  {
>  	struct mem_cgroup *memcg;
> +	unsigned short id;
>  
>  	if (!do_swap_account)
>  		return;
>  
> -	memcg = swap_cgroup_record(ent, NULL);
> +	id = swap_cgroup_record(ent, 0);
> +	rcu_read_lock();
> +	memcg = mem_cgroup_lookup(id);
>  	if (memcg) {
> +		/*
> +		 * This memcg can be obsolete one. We avoid calling css_tryget
> +		 */
>  		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
>  		mem_cgroup_put(memcg);
>  	}
> +	rcu_read_unlock();

can we have a common function for the above 3 pieces of code?

>  }
>  #endif
>  
> Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
> ===================================================================
> --- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
> +++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
> @@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
>   * cgroup rather than pointer.
>   */

this comment should be updated/removed:

/*
 * This 8bytes seems big..maybe we can reduce this when we can use "id" for
 * cgroup rather than pointer.
 */

>  struct swap_cgroup {
> -	struct mem_cgroup	*val;
> +	unsigned short		id;
>  };
>  #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
>  #define SC_POS_MASK	(SC_PER_PAGE - 1)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] use CSS ID in swap_cgroup for saving memory
  2009-02-25  7:09 ` Li Zefan
@ 2009-02-25  7:15   ` KAMEZAWA Hiroyuki
  2009-02-26  6:55     ` [PATCH] use CSS ID in swap_cgroup for saving memory v4 KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-25  7:15 UTC (permalink / raw)
  To: Li Zefan; +Cc: linux-kernel, linux-mm, nishimura, balbir, menage, akpm

On Wed, 25 Feb 2009 15:09:20 +0800
Li Zefan <lizf@cn.fujitsu.com> wrote:

> >  static inline
> > -struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
> > +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
> >  {
> >  	return NULL;
> 
> return 0;
> 
should be..

> >  }
> >  
> >  static inline
> > -struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
> > +unsigned short lookup_swap_cgroup(swp_entry_t ent)
> >  {
> >  	return NULL;
> 
> return 0;
> 
ok

> >  }
> 
> > @@ -1265,12 +1286,20 @@ int mem_cgroup_cache_charge(struct page 
> >  
> >  	if (do_swap_account && !ret && PageSwapCache(page)) {
> >  		swp_entry_t ent = {.val = page_private(page)};
> > +		unsigned short id;
> >  		/* avoid double counting */
> > -		mem = swap_cgroup_record(ent, NULL);
> > +		id = swap_cgroup_record(ent, 0);
> > +		rcu_read_lock();
> > +		mem = mem_cgroup_lookup(id);
> >  		if (mem) {
> > +			/*
> > +			 * Recorded ID can be obsolete. We avoid calling
> > +			 * css_tryget()
> > +			 */
> >  			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
> >  			mem_cgroup_put(mem);
> >  		}
> > +		rcu_read_unlock();
> >  	}
> >  	return ret;
> >  }
> > @@ -1335,13 +1364,21 @@ void mem_cgroup_commit_charge_swapin(str
> >  	 */
> >  	if (do_swap_account && PageSwapCache(page)) {
> >  		swp_entry_t ent = {.val = page_private(page)};
> > +		unsigned short id;
> >  		struct mem_cgroup *memcg;
> > -		memcg = swap_cgroup_record(ent, NULL);
> > +
> > +		id = swap_cgroup_record(ent, 0);
> > +		rcu_read_lock();
> > +		memcg = mem_cgroup_lookup(id);
> >  		if (memcg) {
> > +			/*
> > +			 * This recorded memcg can be obsolete one. So, avoid
> > +			 * calling css_tryget
> > +			 */
> >  			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> >  			mem_cgroup_put(memcg);
> >  		}
> > -
> > +		rcu_read_unlock();
> >  	}
> >  	/* add this page(page_cgroup) to the LRU we want. */
> >  
> > @@ -1462,7 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struc
> >  					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
> >  	/* record memcg information */
> >  	if (do_swap_account && memcg) {
> > -		swap_cgroup_record(ent, memcg);
> > +		swap_cgroup_record(ent, css_id(&memcg->css));
> >  		mem_cgroup_get(memcg);
> >  	}
> >  	if (memcg)
> > @@ -1477,15 +1514,22 @@ void mem_cgroup_uncharge_swapcache(struc
> >  void mem_cgroup_uncharge_swap(swp_entry_t ent)
> >  {
> >  	struct mem_cgroup *memcg;
> > +	unsigned short id;
> >  
> >  	if (!do_swap_account)
> >  		return;
> >  
> > -	memcg = swap_cgroup_record(ent, NULL);
> > +	id = swap_cgroup_record(ent, 0);
> > +	rcu_read_lock();
> > +	memcg = mem_cgroup_lookup(id);
> >  	if (memcg) {
> > +		/*
> > +		 * This memcg can be obsolete one. We avoid calling css_tryget
> > +		 */
> >  		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
> >  		mem_cgroup_put(memcg);
> >  	}
> > +	rcu_read_unlock();
> 
> can we have a common function for the above 3 pieces of code?
> 

I don't think it's better. All are under rcu_read_lock() and does
"charge" management in diffferent meanings/context.
These small pieces of code are worth to be open coded.




> >  }
> >  #endif
> >  
> > Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
> > ===================================================================
> > --- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
> > +++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
> > @@ -290,7 +290,7 @@ struct swap_cgroup_ctrl swap_cgroup_ctrl
> >   * cgroup rather than pointer.
> >   */
> 
> this comment should be updated/removed:
> 
> /*
>  * This 8bytes seems big..maybe we can reduce this when we can use "id" for
>  * cgroup rather than pointer.
>  */
> 
Ah, I missed this.

I'll update and post tomorrow, again if no "don't do that"

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] use CSS ID in swap_cgroup for saving memory v4
  2009-02-25  7:15   ` KAMEZAWA Hiroyuki
@ 2009-02-26  6:55     ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 4+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-02-26  6:55 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Li Zefan, linux-kernel, linux-mm, nishimura, balbir, menage, akpm

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

This patch tires to use CSS ID for records in swap_cgroup.
By this, on 64bit machine, size of swap_cgroup goes down to 2 bytes from 8bytes.

This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)
	From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
	To   size of swap_cgroup = 2G/4k * 2 = 1Mbytes.
Reduction is large. Of course, there are trade-offs. This CSS ID will add
overhead to swap-in/swap-out/swap-free.

But in general,
  - swap is a resource which the user tend to avoid use.
  - If swap is never used, swap_cgroup area is not used.
  - Reading traditional manuals, size of swap should be proportional to
    size of memory. Memory size of machine is increasing now.

I think reducing size of swap_cgroup makes sense.
    
Note:
  - ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
  - memcg can be obsolete at rmdir() but not freed while refcnt from
    swap_cgroup is available.

This is still under test. Any comments are welcome.

Changelog: v3 -> v4
 - fixed not configured case
 - deleted unnecessary comments
Changelog: v2 -> v3
 - fixed a NULL pointer bug reported by Nishimura.
 - fixed message in dmesg

Changelog: v1 -> v2
 - removed css_tryget().
 - fixed texts

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
Index: mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
===================================================================
--- mmotm-2.6.29-Feb24.orig/include/linux/page_cgroup.h
+++ mmotm-2.6.29-Feb24/include/linux/page_cgroup.h
@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 #include <linux/swap.h>
-extern struct mem_cgroup *
-swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
-extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
 #include <linux/swap.h>
 
 static inline
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
-	return NULL;
+	return 0;
 }
 
 static inline
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
 {
-	return NULL;
+	return 0;
 }
 
 static inline int
Index: mmotm-2.6.29-Feb24/mm/memcontrol.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/memcontrol.c
+++ mmotm-2.6.29-Feb24/mm/memcontrol.c
@@ -991,20 +991,41 @@ nomem:
 	return -ENOMEM;
 }
 
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller must check css_is_removed() or some if
+ * it's concern. (dropping refcnt from swap can be called against removed
+ * memcg.)
+ */
+static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
+{
+	struct cgroup_subsys_state *css;
+
+	/* ID 0 is unused ID */
+	if (!id)
+		return NULL;
+	css = css_lookup(&mem_cgroup_subsys, id);
+	if (!css)
+		return NULL;
+	return container_of(css, struct mem_cgroup, css);
+}
+
 static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
 {
-	struct mem_cgroup *mem;
+	unsigned short id;
+	struct mem_cgroup *mem = NULL;
 	swp_entry_t ent;
 
 	if (!PageSwapCache(page))
 		return NULL;
 
 	ent.val = page_private(page);
-	mem = lookup_swap_cgroup(ent);
-	if (!mem)
-		return NULL;
-	if (!css_tryget(&mem->css))
-		return NULL;
+	id = lookup_swap_cgroup(ent);
+	rcu_read_lock();
+	mem = mem_cgroup_lookup(id);
+	if (mem && !css_tryget(&mem->css))
+		mem = NULL;
+	rcu_read_unlock();
 	return mem;
 }
 
@@ -1265,12 +1286,22 @@ int mem_cgroup_cache_charge(struct page 
 
 	if (do_swap_account && !ret && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		unsigned short id;
 		/* avoid double counting */
-		mem = swap_cgroup_record(ent, NULL);
+		id = swap_cgroup_record(ent, 0);
+		rcu_read_lock();
+		mem = mem_cgroup_lookup(id);
 		if (mem) {
+			/*
+			 * We did swap-in. Then, this entry is doubly counted
+			 * both in mem and memsw. We uncharge it, here.
+			 * Recorded ID can be obsolete. We avoid calling
+			 * css_tryget()
+			 */
 			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 			mem_cgroup_put(mem);
 		}
+		rcu_read_unlock();
 	}
 	return ret;
 }
@@ -1335,13 +1366,21 @@ void mem_cgroup_commit_charge_swapin(str
 	 */
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		unsigned short id;
 		struct mem_cgroup *memcg;
-		memcg = swap_cgroup_record(ent, NULL);
+
+		id = swap_cgroup_record(ent, 0);
+		rcu_read_lock();
+		memcg = mem_cgroup_lookup(id);
 		if (memcg) {
+			/*
+			 * This recorded memcg can be obsolete one. So, avoid
+			 * calling css_tryget
+			 */
 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 			mem_cgroup_put(memcg);
 		}
-
+		rcu_read_unlock();
 	}
 	/* add this page(page_cgroup) to the LRU we want. */
 
@@ -1462,7 +1501,7 @@ void mem_cgroup_uncharge_swapcache(struc
 					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
 	/* record memcg information */
 	if (do_swap_account && memcg) {
-		swap_cgroup_record(ent, memcg);
+		swap_cgroup_record(ent, css_id(&memcg->css));
 		mem_cgroup_get(memcg);
 	}
 	if (memcg)
@@ -1477,15 +1516,23 @@ void mem_cgroup_uncharge_swapcache(struc
 void mem_cgroup_uncharge_swap(swp_entry_t ent)
 {
 	struct mem_cgroup *memcg;
+	unsigned short id;
 
 	if (!do_swap_account)
 		return;
 
-	memcg = swap_cgroup_record(ent, NULL);
+	id = swap_cgroup_record(ent, 0);
+	rcu_read_lock();
+	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
+		/*
+		 * We uncharge this because swap is freed.
+		 * This memcg can be obsolete one. We avoid calling css_tryget
+		 */
 		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_put(memcg);
 	}
+	rcu_read_unlock();
 }
 #endif
 
Index: mmotm-2.6.29-Feb24/mm/page_cgroup.c
===================================================================
--- mmotm-2.6.29-Feb24.orig/mm/page_cgroup.c
+++ mmotm-2.6.29-Feb24/mm/page_cgroup.c
@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {
 
 struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
 
-/*
- * This 8bytes seems big..maybe we can reduce this when we can use "id" for
- * cgroup rather than pointer.
- */
 struct swap_cgroup {
-	struct mem_cgroup	*val;
+	unsigned short		id;
 };
 #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
 #define SC_POS_MASK	(SC_PER_PAGE - 1)
@@ -342,10 +338,10 @@ not_enough_page:
  * @ent: swap entry to be recorded into
  * @mem: mem_cgroup to be recorded
  *
- * Returns old value at success, NULL at failure.
- * (Of course, old value can be NULL.)
+ * Returns old value at success, 0 at failure.
+ * (Of course, old value can be 0.)
  */
-struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
 	int type = swp_type(ent);
 	unsigned long offset = swp_offset(ent);
@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(sw
 	struct swap_cgroup_ctrl *ctrl;
 	struct page *mappage;
 	struct swap_cgroup *sc;
-	struct mem_cgroup *old;
+	unsigned short old;
 
 	if (!do_swap_account)
-		return NULL;
+		return 0;
 
 	ctrl = &swap_cgroup_ctrl[type];
 
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	old = sc->val;
-	sc->val = mem;
+	old = sc->id;
+	sc->id = id;
 
 	return old;
 }
@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(sw
  * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
  * @ent: swap entry to be looked up.
  *
- * Returns pointer to mem_cgroup at success. NULL at failure.
+ * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
  */
-struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup(swp_entry_t ent)
 {
 	int type = swp_type(ent);
 	unsigned long offset = swp_offset(ent);
@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(sw
 	struct swap_cgroup_ctrl *ctrl;
 	struct page *mappage;
 	struct swap_cgroup *sc;
-	struct mem_cgroup *ret;
+	unsigned short ret;
 
 	if (!do_swap_account)
-		return NULL;
+		return 0;
 
 	ctrl = &swap_cgroup_ctrl[type];
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	ret = sc->val;
+	ret = sc->id;
 	return ret;
 }
 
@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigne
 
 	printk(KERN_INFO
 		"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
-		" and %ld bytes to hold mem_cgroup pointers on swap\n",
+		" and %ld bytes to hold mem_cgroup information per swap ents\n",
 		array_size, length * PAGE_SIZE);
 	printk(KERN_INFO
 	"swap_cgroup can be disabled by noswapaccount boot option.\n");

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-02-26  6:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-25  6:26 [PATCH] use CSS ID in swap_cgroup for saving memory KAMEZAWA Hiroyuki
2009-02-25  7:09 ` Li Zefan
2009-02-25  7:15   ` KAMEZAWA Hiroyuki
2009-02-26  6:55     ` [PATCH] use CSS ID in swap_cgroup for saving memory v4 KAMEZAWA Hiroyuki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox