linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* Help Resource Counters Scale better (v4)
@ 2009-08-11 14:44 Balbir Singh
  2009-08-11 14:54 ` Prarit Bhargava
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-11 14:44 UTC (permalink / raw)
  To: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, Prarit Bhargava, andi.kleen, Pavel Emelianov, lizf
  Cc: linux-kernel, linux-mm


Enhancement: Remove the overhead of root based resource counter accounting

From: Balbir Singh <balbir@linux.vnet.ibm.com>

This patch reduces the resource counter overhead (mostly spinlock)
associated with the root cgroup. This is a part of the several
patches to reduce mem cgroup overhead. I had posted other
approaches earlier (including using percpu counters). Those
patches will be a natural addition and will be added iteratively
on top of these.

The patch stops resource counter accounting for the root cgroup.
The data for display is derived from the statisitcs we maintain
via mem_cgroup_charge_statistics (which is more scalable).

The tests results I see on a 24 way show that

1. The lock contention disappears from /proc/lock_stats
2. The results of the test are comparable to running with
   cgroup_disable=memory.

Please test/review.

---

 mm/memcontrol.c |   89 +++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 70 insertions(+), 19 deletions(-)


diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 48a38e1..6d82f8d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -70,6 +70,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
 	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
+	MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 
 	MEM_CGROUP_STAT_NSTATS,
 };
@@ -478,6 +479,19 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 	return mz;
 }
 
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+					 bool charge)
+{
+	int val = (charge)? 1 : -1;
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cpustat;
+	int cpu = get_cpu();
+
+	cpustat = &stat->cpustat[cpu];
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+	put_cpu();
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -1281,9 +1295,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	VM_BUG_ON(css_is_removed(&mem->css));
 
 	while (1) {
-		int ret;
+		int ret = 0;
 		unsigned long flags = 0;
 
+		if (mem_cgroup_is_root(mem))
+			goto done;
 		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
 						&soft_fail_res);
 		if (likely(!ret)) {
@@ -1343,6 +1359,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
 			mem_cgroup_update_tree(mem_over_soft_limit, page);
 	}
+done:
 	return 0;
 nomem:
 	css_put(&mem->css);
@@ -1415,9 +1432,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(mem)) {
+			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			if (do_swap_account)
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+							NULL);
+		}
 		css_put(&mem->css);
 		return;
 	}
@@ -1494,7 +1514,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	if (pc->mem_cgroup != from)
 		goto out;
 
-	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(from))
+		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1513,7 +1534,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 						1);
 	}
 
-	if (do_swap_account)
+	if (do_swap_account && !mem_cgroup_is_root(from))
 		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
 	css_put(&from->css);
 
@@ -1584,9 +1605,11 @@ uncharge:
 	/* drop extra refcnt by try_charge() */
 	css_put(&parent->css);
 	/* uncharge if move fails */
-	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(parent)) {
+		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	}
 	return ret;
 }
 
@@ -1775,7 +1798,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			if (!mem_cgroup_is_root(memcg))
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+							NULL);
+			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
 		rcu_read_unlock();
@@ -1800,9 +1826,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 		return;
 	if (!mem)
 		return;
-	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
 	css_put(&mem->css);
 }
 
@@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
-	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		if (do_swap_account &&
+				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
+	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
+		mem_cgroup_swap_statistics(mem, true);
 	mem_cgroup_charge_statistics(mem, pc, false);
 
 	ClearPageCgroupUsed(pc);
@@ -1948,7 +1981,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * We uncharge this because swap is freed.
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(memcg))
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
 	rcu_read_unlock();
@@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 	name = MEMFILE_ATTR(cft->private);
 	switch (type) {
 	case _MEM:
-		val = res_counter_read_u64(&mem->res, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->res, name);
 		break;
 	case _MEMSWAP:
-		val = res_counter_read_u64(&mem->memsw, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_SWAPOUT);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->memsw, name);
 		break;
 	default:
 		BUG();

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:44 Help Resource Counters Scale better (v4) Balbir Singh
@ 2009-08-11 14:54 ` Prarit Bhargava
  2009-08-11 15:00   ` Balbir Singh
                     ` (2 more replies)
  2009-08-11 16:52 ` Balbir Singh
  2009-08-11 23:31 ` Andrew Morton
  2 siblings, 3 replies; 20+ messages in thread
From: Prarit Bhargava @ 2009-08-11 14:54 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, andi.kleen, Pavel Emelianov, lizf, linux-kernel,
	linux-mm



Balbir Singh wrote:
> Enhancement: Remove the overhead of root based resource counter accounting
>
>
>   

<snip>
> Please test/review.
>
>   
FWIW ...

On a 64p/32G system running 2.6.31-git2-rc5, with RESOURCE_COUNTERS off, 
"time make -j64" results in

real    4m54.972s
user    90m13.456s
sys     50m19.711s

On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on,
plus Balbir's "Help Resource Counters Scale Better (v3)" patch, and this 
patch, results in

 real    4m18.607s
user    84m58.943s
sys     50m52.682s

P.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:54 ` Prarit Bhargava
@ 2009-08-11 15:00   ` Balbir Singh
  2009-08-11 15:11     ` Prarit Bhargava
  2009-08-11 15:14   ` Prarit Bhargava
  2009-08-11 15:20   ` Andi Kleen
  2 siblings, 1 reply; 20+ messages in thread
From: Balbir Singh @ 2009-08-11 15:00 UTC (permalink / raw)
  To: Prarit Bhargava
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, andi.kleen, Pavel Emelianov, lizf, linux-kernel,
	linux-mm

* Prarit Bhargava <prarit@redhat.com> [2009-08-11 10:54:50]:

>
>
> Balbir Singh wrote:
>> Enhancement: Remove the overhead of root based resource counter accounting
>>
>>
>>   
>
> <snip>
>> Please test/review.
>>
>>   
> FWIW ...
>

Thanks for the testing!

> On a 64p/32G system running 2.6.31-git2-rc5, with RESOURCE_COUNTERS off,  
> "time make -j64" results in
>
> real    4m54.972s
> user    90m13.456s
> sys     50m19.711s
>
> On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on,
> plus Balbir's "Help Resource Counters Scale Better (v3)" patch, and this  
                                                     ^^^
                                        you mean (v4) right?
> patch, results in
>
> real    4m18.607s
> user    84m58.943s
> sys     50m52.682s
>

Without the patch and RESOURCE_COUNTERS do you see a big overhead. I'd
assume so, I am seeing it on my 24 way box that I have access to.

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 15:00   ` Balbir Singh
@ 2009-08-11 15:11     ` Prarit Bhargava
  0 siblings, 0 replies; 20+ messages in thread
From: Prarit Bhargava @ 2009-08-11 15:11 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, andi.kleen, Pavel Emelianov, lizf, linux-kernel,
	linux-mm


>>     
>
> Without the patch and RESOURCE_COUNTERS do you see a big overhead. I'd
> assume so, I am seeing it on my 24 way box that I have access to.
>   

I see a *huge* overhead:

real 27m8.988s
user 87m24.916s
sys 382m6.037s

P.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:54 ` Prarit Bhargava
  2009-08-11 15:00   ` Balbir Singh
@ 2009-08-11 15:14   ` Prarit Bhargava
  2009-08-11 15:20   ` Andi Kleen
  2 siblings, 0 replies; 20+ messages in thread
From: Prarit Bhargava @ 2009-08-11 15:14 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, andi.kleen, Pavel Emelianov, lizf, linux-kernel,
	linux-mm



Prarit Bhargava wrote:
>
>
> Balbir Singh wrote:
>> Enhancement: Remove the overhead of root based resource counter 
>> accounting
>>
>>
>>   
>
> <snip>
>> Please test/review.
>>
>>   
> FWIW ...
>
> On a 64p/32G system running 2.6.31-git2-rc5, with RESOURCE_COUNTERS 
> off, "time make -j64" results in
>
> real    4m54.972s
> user    90m13.456s
> sys     50m19.711s
>
> On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on,
> plus Balbir's "Help Resource Counters Scale Better (v3)" patch, and 
> this patch, results in

Oops, sorry Balbir. 

I meant to write:

On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on, 
plus Balbir's "Help Resource Counters Scale Better (v4)" patch results 
in ...

Sorry for the typo,

P.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:54 ` Prarit Bhargava
  2009-08-11 15:00   ` Balbir Singh
  2009-08-11 15:14   ` Prarit Bhargava
@ 2009-08-11 15:20   ` Andi Kleen
  2009-08-11 15:21     ` Prarit Bhargava
  2 siblings, 1 reply; 20+ messages in thread
From: Andi Kleen @ 2009-08-11 15:20 UTC (permalink / raw)
  To: Prarit Bhargava
  Cc: balbir, Andrew Morton, KAMEZAWA Hiroyuki, nishimura,
	KOSAKI Motohiro, menage, andi.kleen, Pavel Emelianov, lizf,
	linux-kernel, linux-mm

Prarit Bhargava <prarit@redhat.com> writes:
>
> On a 64p/32G system running 2.6.31-git2-rc5, with RESOURCE_COUNTERS

This is CONFIG_RESOURCE_COUNTERS off at compile time right?

> off, "time make -j64" results in
>
> real    4m54.972s
> user    90m13.456s
> sys     50m19.711s
>
> On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on,
> plus Balbir's "Help Resource Counters Scale Better (v3)" patch, and
> this patch, results in
>
> real    4m18.607s
> user    84m58.943s
> sys     50m52.682s

Hmm, so resource counters on with the patch is faster than
CONFIG_RESOURCE_COUNTERS compiled out in real time? That seems
counterintuitive. At best I would expect the patch to break even, but
not be actually faster.

Is the compilation stable over multiple runs?

Still it looks like the patch is clearly needed.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 15:20   ` Andi Kleen
@ 2009-08-11 15:21     ` Prarit Bhargava
  0 siblings, 0 replies; 20+ messages in thread
From: Prarit Bhargava @ 2009-08-11 15:21 UTC (permalink / raw)
  To: Andi Kleen
  Cc: balbir, Andrew Morton, KAMEZAWA Hiroyuki, nishimura,
	KOSAKI Motohiro, menage, andi.kleen, Pavel Emelianov, lizf,
	linux-kernel, linux-mm



Andi Kleen wrote:
> Prarit Bhargava <prarit@redhat.com> writes:
>   
>> On a 64p/32G system running 2.6.31-git2-rc5, with RESOURCE_COUNTERS
>>     
>
> This is CONFIG_RESOURCE_COUNTERS off at compile time right?
>
>   
>> off, "time make -j64" results in
>>     

^^^ yes.  It is off.

>> real    4m54.972s
>> user    90m13.456s
>> sys     50m19.711s
>>
>> On the same system, running 2.6.31-git2-rc5, with RESOURCE_COUNTERS on,
>> plus Balbir's "Help Resource Counters Scale Better (v3)" patch, and
>> this patch, results in
>>
>> real    4m18.607s
>> user    84m58.943s
>> sys     50m52.682s
>>     
>
> Hmm, so resource counters on with the patch is faster than
> CONFIG_RESOURCE_COUNTERS compiled out in real time? That seems
> counterintuitive. At best I would expect the patch to break even, but
> not be actually faster.
>   

That is only after one run.  I'm doing 10 reboot and build runs right 
now and will average them out.  I'll ping back here with results.

>
> Still it looks like the patch is clearly needed.
>
>   

P.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:44 Help Resource Counters Scale better (v4) Balbir Singh
  2009-08-11 14:54 ` Prarit Bhargava
@ 2009-08-11 16:52 ` Balbir Singh
  2009-08-11 23:31 ` Andrew Morton
  2 siblings, 0 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-11 16:52 UTC (permalink / raw)
  To: Andrew Morton, KAMEZAWA Hiroyuki, nishimura, KOSAKI Motohiro,
	menage, Prarit Bhargava, andi.kleen, Pavel Emelianov, lizf
  Cc: linux-kernel, linux-mm

* Balbir Singh <balbir@linux.vnet.ibm.com> [2009-08-11 20:14:05]:

Here is a verion of the patch, with the checkpatch.pl warnings fixed.
I forgot to refresh my changes :(


From: Balbir Singh <balbir@linux.vnet.ibm.com>

Enhancement: Remove the overhead of root based resource counter accounting

This patch reduces the resource counter overhead (mostly spinlock)
associated with the root cgroup. This is a part of the several
patches to reduce mem cgroup overhead. I had posted other
approaches earlier (including using percpu counters). Those
patches will be a natural addition and will be added iteratively
on top of these.

The patch stops resource counter accounting for the root cgroup.
The data for display is derived from the statisitcs we maintain
via mem_cgroup_charge_statistics (which is more scalable).

The tests results I see on a 24 way show that

1. The lock contention disappears from /proc/lock_stats
2. The results of the test are comparable to running with
  cgroup_disable=memory.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 mm/memcontrol.c |   89 +++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 70 insertions(+), 19 deletions(-)


diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 48a38e1..4f51885 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -70,6 +70,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
 	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
+	MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 
 	MEM_CGROUP_STAT_NSTATS,
 };
@@ -478,6 +479,19 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 	return mz;
 }
 
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+					 bool charge)
+{
+	int val = (charge) ? 1 : -1;
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cpustat;
+	int cpu = get_cpu();
+
+	cpustat = &stat->cpustat[cpu];
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+	put_cpu();
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -1281,9 +1295,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	VM_BUG_ON(css_is_removed(&mem->css));
 
 	while (1) {
-		int ret;
+		int ret = 0;
 		unsigned long flags = 0;
 
+		if (mem_cgroup_is_root(mem))
+			goto done;
 		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
 						&soft_fail_res);
 		if (likely(!ret)) {
@@ -1343,6 +1359,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
 			mem_cgroup_update_tree(mem_over_soft_limit, page);
 	}
+done:
 	return 0;
 nomem:
 	css_put(&mem->css);
@@ -1415,9 +1432,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(mem)) {
+			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			if (do_swap_account)
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+							NULL);
+		}
 		css_put(&mem->css);
 		return;
 	}
@@ -1494,7 +1514,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	if (pc->mem_cgroup != from)
 		goto out;
 
-	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(from))
+		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1513,7 +1534,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 						1);
 	}
 
-	if (do_swap_account)
+	if (do_swap_account && !mem_cgroup_is_root(from))
 		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
 	css_put(&from->css);
 
@@ -1584,9 +1605,11 @@ uncharge:
 	/* drop extra refcnt by try_charge() */
 	css_put(&parent->css);
 	/* uncharge if move fails */
-	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(parent)) {
+		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	}
 	return ret;
 }
 
@@ -1775,7 +1798,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			if (!mem_cgroup_is_root(memcg))
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+							NULL);
+			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
 		rcu_read_unlock();
@@ -1800,9 +1826,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 		return;
 	if (!mem)
 		return;
-	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
 	css_put(&mem->css);
 }
 
@@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
-	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		if (do_swap_account &&
+				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
+	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
+		mem_cgroup_swap_statistics(mem, true);
 	mem_cgroup_charge_statistics(mem, pc, false);
 
 	ClearPageCgroupUsed(pc);
@@ -1948,7 +1981,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * We uncharge this because swap is freed.
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(memcg))
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
 	rcu_read_unlock();
@@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 	name = MEMFILE_ATTR(cft->private);
 	switch (type) {
 	case _MEM:
-		val = res_counter_read_u64(&mem->res, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->res, name);
 		break;
 	case _MEMSWAP:
-		val = res_counter_read_u64(&mem->memsw, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_SWAPOUT);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->memsw, name);
 		break;
 	default:
 		BUG();

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 14:44 Help Resource Counters Scale better (v4) Balbir Singh
  2009-08-11 14:54 ` Prarit Bhargava
  2009-08-11 16:52 ` Balbir Singh
@ 2009-08-11 23:31 ` Andrew Morton
  2009-08-12  2:34   ` Daisuke Nishimura
                     ` (3 more replies)
  2 siblings, 4 replies; 20+ messages in thread
From: Andrew Morton @ 2009-08-11 23:31 UTC (permalink / raw)
  To: balbir
  Cc: kamezawa.hiroyu, nishimura, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

On Tue, 11 Aug 2009 20:14:05 +0530
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:

> Enhancement: Remove the overhead of root based resource counter accounting
> 
> From: Balbir Singh <balbir@linux.vnet.ibm.com>
> 
> This patch reduces the resource counter overhead (mostly spinlock)
> associated with the root cgroup. This is a part of the several
> patches to reduce mem cgroup overhead. I had posted other
> approaches earlier (including using percpu counters). Those
> patches will be a natural addition and will be added iteratively
> on top of these.
> 
> The patch stops resource counter accounting for the root cgroup.
> The data for display is derived from the statisitcs we maintain
> via mem_cgroup_charge_statistics (which is more scalable).
> 
> The tests results I see on a 24 way show that
> 
> 1. The lock contention disappears from /proc/lock_stats
> 2. The results of the test are comparable to running with
>    cgroup_disable=memory.
> 
> Please test/review.

I don't get it.

The patch apepars to skip accounting altogether for the root memcgroup
and then adds some accounting back in for swap.  Or something like
that.  How come?  Do we actually not need the root memcgroup
accounting?

IOW, the changelog sucks ;)

Is this an alternative approach to using percpu_counters, or do we do
both or do we choose one or the other?  res_counter_charge() really is
quite sucky.

The patch didn't have a signoff.

It would be nice to finalise those performance testing results and
include them in the new, improved patch description.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 23:31 ` Andrew Morton
@ 2009-08-12  2:34   ` Daisuke Nishimura
  2009-08-12  3:56   ` Balbir Singh
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 20+ messages in thread
From: Daisuke Nishimura @ 2009-08-12  2:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: balbir, kamezawa.hiroyu, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, nishimura, linux-kernel, linux-mm

On Tue, 11 Aug 2009 16:31:59 -0700, Andrew Morton <akpm@linux-foundation.org> wrote:
> On Tue, 11 Aug 2009 20:14:05 +0530
> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> 
> > Enhancement: Remove the overhead of root based resource counter accounting
> > 
> > From: Balbir Singh <balbir@linux.vnet.ibm.com>
> > 
> > This patch reduces the resource counter overhead (mostly spinlock)
> > associated with the root cgroup. This is a part of the several
> > patches to reduce mem cgroup overhead. I had posted other
> > approaches earlier (including using percpu counters). Those
> > patches will be a natural addition and will be added iteratively
> > on top of these.
> > 
> > The patch stops resource counter accounting for the root cgroup.
> > The data for display is derived from the statisitcs we maintain
> > via mem_cgroup_charge_statistics (which is more scalable).
> > 
> > The tests results I see on a 24 way show that
> > 
> > 1. The lock contention disappears from /proc/lock_stats
> > 2. The results of the test are comparable to running with
> >    cgroup_disable=memory.
> > 
> > Please test/review.
> 
> I don't get it.
> 
> The patch apepars to skip accounting altogether for the root memcgroup
> and then adds some accounting back in for swap.  Or something like
> that.  How come?  Do we actually not need the root memcgroup
> accounting?
> 
IIUC, this patch doesn't remove the root memcgroup accounting, it just changes
the counter for root memcgroup accounting from res_counter to cpustat[cpu] to reduce
the lock congestion of res_counter especially on a big platform.
Using res_counter(lock, check limit, charge) for root memcgroup would be overkill
because root memcgroup has no limit now(by memcg-remove-the-overhead-associated-with-the-root-cgroup.patch).
And, MEM_CGROUP_STAT_SWAPOUT would be needed to show memsw.usage_in_bytes of root
memcgroup. We didn't have cpustat[cpu] counter for swap accounting so far.

> IOW, the changelog sucks ;)
> 
> Is this an alternative approach to using percpu_counters, or do we do
> both or do we choose one or the other?  res_counter_charge() really is
> quite sucky.
> 
> The patch didn't have a signoff.
> 
> It would be nice to finalise those performance testing results and
> include them in the new, improved patch description.
> 
agreed.


Thanks,
Daisuke Nishimura.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 23:31 ` Andrew Morton
  2009-08-12  2:34   ` Daisuke Nishimura
@ 2009-08-12  3:56   ` Balbir Singh
  2009-08-12  3:57   ` Balbir Singh
  2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
  3 siblings, 0 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-12  3:56 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kamezawa.hiroyu, nishimura, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

* Andrew Morton <akpm@linux-foundation.org> [2009-08-11 16:31:59]:

> On Tue, 11 Aug 2009 20:14:05 +0530
> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> 
> > Enhancement: Remove the overhead of root based resource counter accounting
> > 
> > From: Balbir Singh <balbir@linux.vnet.ibm.com>
> > 
> > This patch reduces the resource counter overhead (mostly spinlock)
> > associated with the root cgroup. This is a part of the several
> > patches to reduce mem cgroup overhead. I had posted other
> > approaches earlier (including using percpu counters). Those
> > patches will be a natural addition and will be added iteratively
> > on top of these.
> > 
> > The patch stops resource counter accounting for the root cgroup.
> > The data for display is derived from the statisitcs we maintain
> > via mem_cgroup_charge_statistics (which is more scalable).
> > 
> > The tests results I see on a 24 way show that
> > 
> > 1. The lock contention disappears from /proc/lock_stats
> > 2. The results of the test are comparable to running with
> >    cgroup_disable=memory.
> > 
> > Please test/review.
> 
> I don't get it.
> 
> The patch apepars to skip accounting altogether for the root memcgroup
> and then adds some accounting back in for swap.  Or something like
> that.  How come?  Do we actually not need the root memcgroup
> accounting?
>

The changelog mentions that the statistics are derived. For memsw as
Daisuke-San mentioned, the SWAP accounting is for memsw. We can derive
memory.usage_in_bytes from RSS+Cache fields in the memory.stat
accounting. For memsw, we needed SWAP accounting.

 
> IOW, the changelog sucks ;)
> 
> Is this an alternative approach to using percpu_counters, or do we do
> both or do we choose one or the other?  res_counter_charge() really is
> quite sucky.
> 
> The patch didn't have a signoff.
> 
> It would be nice to finalise those performance testing results and
> include them in the new, improved patch description.
> 

I'll submit a new patch with better changelog, checkpatch.pl fixes and
test results.


-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Help Resource Counters Scale better (v4)
  2009-08-11 23:31 ` Andrew Morton
  2009-08-12  2:34   ` Daisuke Nishimura
  2009-08-12  3:56   ` Balbir Singh
@ 2009-08-12  3:57   ` Balbir Singh
  2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
  3 siblings, 0 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-12  3:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kamezawa.hiroyu, nishimura, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

* Andrew Morton <akpm@linux-foundation.org> [2009-08-11 16:31:59]:

> Is this an alternative approach to using percpu_counters, or do we do
> both or do we choose one or the other?  res_counter_charge() really is
> quite sucky.
>

This is an alternative approach, I'll still do the percpu counter
patches, but once the overhead for root is gone, most users not using
this functionality will not see any major overhead. 

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-11 23:31 ` Andrew Morton
                     ` (2 preceding siblings ...)
  2009-08-12  3:57   ` Balbir Singh
@ 2009-08-12  4:57   ` Balbir Singh
  2009-08-12 10:53     ` Prarit Bhargava
                       ` (2 more replies)
  3 siblings, 3 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-12  4:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kamezawa.hiroyu, nishimura, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

Hi, Andrew,

Does this look better, could you please replace the older patch with
this one.

1. I did a quick compile test
2. Ran scripts/checkpatch.pl



From: Balbir Singh <balbir@linux.vnet.ibm.com>

Enhancement: Remove the overhead of root based resource counter accounting

This patch reduces the resource counter overhead (mostly spinlock)
associated with the root cgroup. This is a part of the several
patches to reduce mem cgroup overhead. I had posted other
approaches earlier (including using percpu counters). Those
patches will be a natural addition and will be added iteratively
on top of these.

The patch stops resource counter accounting for the root cgroup.
The data for display is derived from the statisitcs we maintain
via mem_cgroup_charge_statistics (which is more scalable).
What happens today is that, we do double accounting, once using
res_counter_charge() and once using memory_cgroup_charge_statistics().
For the root, since we don't implement limits any more, we don't
need to track every charge via res_counter_charge() and check
for limit being exceeded and reclaim.

The main mem->res usage_in_bytes can be derived by summing
the cache and rss usage data from memory statistics
(MEM_CGRUP_STAT_RSS and MEM_CGROUP_STAT_CACHE). However, for
memsw->res usage_in_bytes, we need additional data about
swapped out memory. This patch adds a MEM_CGROUP_STAT_SWAPOUT
and uses that along with MEM_CGROUP_STAT_RSS and MEM_CGROUP_STAT_CACHE
to derive the memsw data.

The tests results I see on a 24 way show that

1. The lock contention disappears from /proc/lock_stats
2. The results of the test are comparable to running with
  cgroup_disable=memory.

Here is a sample of my program runs

Without Patch

 Performance counter stats for '/home/balbir/parallel_pagefault':

 7192804.124144  task-clock-msecs         #     23.937 CPUs
         424691  context-switches         #      0.000 M/sec
            267  CPU-migrations           #      0.000 M/sec
       28498113  page-faults              #      0.004 M/sec
  5826093739340  cycles                   #    809.989 M/sec
   408883496292  instructions             #      0.070 IPC
     7057079452  cache-references         #      0.981 M/sec
     3036086243  cache-misses             #      0.422 M/sec

  300.485365680  seconds time elapsed

With cgroup_disable=memory

 Performance counter stats for '/home/balbir/parallel_pagefault':

 7182183.546587  task-clock-msecs         #     23.915 CPUs
         425458  context-switches         #      0.000 M/sec
            203  CPU-migrations           #      0.000 M/sec
       92545093  page-faults              #      0.013 M/sec
  6034363609986  cycles                   #    840.185 M/sec
   437204346785  instructions             #      0.072 IPC
     6636073192  cache-references         #      0.924 M/sec
     2358117732  cache-misses             #      0.328 M/sec

  300.320905827  seconds time elapsed

With this patch applied

 Performance counter stats for '/home/balbir/parallel_pagefault':

 7191619.223977  task-clock-msecs         #     23.955 CPUs
         422579  context-switches         #      0.000 M/sec
             88  CPU-migrations           #      0.000 M/sec
       91946060  page-faults              #      0.013 M/sec
  5957054385619  cycles                   #    828.333 M/sec
  1058117350365  instructions             #      0.178 IPC
     9161776218  cache-references         #      1.274 M/sec
     1920494280  cache-misses             #      0.267 M/sec

  300.218764862  seconds time elapsed


Data from Prarit (kernel compile with make -j64 on a 64
CPU/32G machine)

For a single run

Without patch

real 27m8.988s
user 87m24.916s
sys 382m6.037s

With patch

real    4m18.607s
user    84m58.943s
sys     50m52.682s


With config turned off

real    4m54.972s
user    90m13.456s
sys     50m19.711s

NOTE: The data looks counterintuitive due to the increased performance
with the patch, even over the config being turned off. We probably need
more runs, but so far all testing has shown that the patches definitely
help.


Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 mm/memcontrol.c |   89 +++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 70 insertions(+), 19 deletions(-)


diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 48a38e1..4f51885 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -70,6 +70,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
 	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
+	MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 
 	MEM_CGROUP_STAT_NSTATS,
 };
@@ -478,6 +479,19 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 	return mz;
 }
 
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+					 bool charge)
+{
+	int val = (charge) ? 1 : -1;
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cpustat;
+	int cpu = get_cpu();
+
+	cpustat = &stat->cpustat[cpu];
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+	put_cpu();
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -1281,9 +1295,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	VM_BUG_ON(css_is_removed(&mem->css));
 
 	while (1) {
-		int ret;
+		int ret = 0;
 		unsigned long flags = 0;
 
+		if (mem_cgroup_is_root(mem))
+			goto done;
 		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
 						&soft_fail_res);
 		if (likely(!ret)) {
@@ -1343,6 +1359,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
 			mem_cgroup_update_tree(mem_over_soft_limit, page);
 	}
+done:
 	return 0;
 nomem:
 	css_put(&mem->css);
@@ -1415,9 +1432,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(mem)) {
+			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			if (do_swap_account)
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+							NULL);
+		}
 		css_put(&mem->css);
 		return;
 	}
@@ -1494,7 +1514,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	if (pc->mem_cgroup != from)
 		goto out;
 
-	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(from))
+		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1513,7 +1534,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 						1);
 	}
 
-	if (do_swap_account)
+	if (do_swap_account && !mem_cgroup_is_root(from))
 		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
 	css_put(&from->css);
 
@@ -1584,9 +1605,11 @@ uncharge:
 	/* drop extra refcnt by try_charge() */
 	css_put(&parent->css);
 	/* uncharge if move fails */
-	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(parent)) {
+		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+	}
 	return ret;
 }
 
@@ -1775,7 +1798,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			if (!mem_cgroup_is_root(memcg))
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+							NULL);
+			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
 		rcu_read_unlock();
@@ -1800,9 +1826,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 		return;
 	if (!mem)
 		return;
-	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
-	if (do_swap_account)
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		if (do_swap_account)
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
 	css_put(&mem->css);
 }
 
@@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
-	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	if (!mem_cgroup_is_root(mem)) {
+		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		if (do_swap_account &&
+				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+	}
+	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
+		mem_cgroup_swap_statistics(mem, true);
 	mem_cgroup_charge_statistics(mem, pc, false);
 
 	ClearPageCgroupUsed(pc);
@@ -1948,7 +1981,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * We uncharge this because swap is freed.
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		if (!mem_cgroup_is_root(memcg))
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
 	rcu_read_unlock();
@@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 	name = MEMFILE_ATTR(cft->private);
 	switch (type) {
 	case _MEM:
-		val = res_counter_read_u64(&mem->res, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->res, name);
 		break;
 	case _MEMSWAP:
-		val = res_counter_read_u64(&mem->memsw, name);
+		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+			val = mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_CACHE);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_RSS);
+			val += mem_cgroup_read_stat(&mem->stat,
+					MEM_CGROUP_STAT_SWAPOUT);
+			val <<= PAGE_SHIFT;
+		} else
+			val = res_counter_read_u64(&mem->memsw, name);
 		break;
 	default:
 		BUG();

-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
@ 2009-08-12 10:53     ` Prarit Bhargava
  2009-08-12 16:19       ` KAMEZAWA Hiroyuki
  2009-08-12 16:28     ` KAMEZAWA Hiroyuki
  2009-08-13  1:03     ` Daisuke Nishimura
  2 siblings, 1 reply; 20+ messages in thread
From: Prarit Bhargava @ 2009-08-12 10:53 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, kamezawa.hiroyu, nishimura, kosaki.motohiro,
	menage, andi.kleen, xemul, lizf, linux-kernel, linux-mm



Balbir Singh wrote:
> Hi, Andrew,
>
> Does this look better, could you please replace the older patch with
> this one.
>
> 1. I did a quick compile test
> 2. Ran scripts/checkpatch.pl
>
>
>   

Andi Kleen suggested I use kernbench to profile the kernel.

2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS on

Tue Aug 11 13:45:14 EDT 2009
2.6.31-rc5-git2-resources
Average Half load -j 32 Run (std deviation):
Elapsed Time 622.588 (119.243)
User Time 4820.8 (962.286)
System Time 9807.63 (2669.55)
Percent CPU 2324 (167.236)
Context Switches 2009606 (368703)
Sleeps 1.24949e+06 (118210)

Average Optimal load -j 256 Run (std deviation):
Elapsed Time 770.97 (90.8685)
User Time 5068.42 (750.933)
System Time 21499.8 (12822.3)
Percent CPU 3660 (1425.28)
Context Switches 2.86467e+06 (971764)
Sleeps 1.32784e+06 (129048)

Average Maximal load -j Run (std deviation):
Elapsed Time 757.018 (22.8371)
User Time 4958.85 (644.65)
System Time 24916.5 (11454.3)
Percent CPU 4046.93 (1279.6)
Context Switches 3.04894e+06 (826687)
Sleeps 1.26053e+06 (146073)


2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS off

Tue Aug 11 17:58:58 EDT 2009
2.6.31-rc5-git2-no-resources
Average Half load -j 32 Run (std deviation):
Elapsed Time 280.176 (21.1131)
User Time 3558.51 (389.488)
System Time 2393.87 (142.692)
Percent CPU 2122.6 (50.5104)
Context Switches 1.20474e+06 (131112)
Sleeps 1062507 (59366.3)

Average Optimal load -j 256 Run (std deviation):
Elapsed Time 223.192 (42.7007)
User Time 4243.19 (967.575)
System Time 2649.57 (344.462)
Percent CPU 2845.5 (856.217)
Context Switches 1.52187e+06 (391821)
Sleeps 1.28862e+06 (274222)

Average Maximal load -j Run (std deviation):
Elapsed Time 216.942 (45.4824)
User Time 3860.46 (966.452)
System Time 2782.17 (344.154)
Percent CPU 2862.47 (720.904)
Context Switches 1.43379e+06 (341021)
Sleeps 1184325 (269392)

2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS on + patch

Tue Aug 11 20:58:31 EDT 2009
2.6.31-rc5-git2-mem-patch
Average Half load -j 32 Run (std deviation):
Elapsed Time 285.788 (18.577)
User Time 3483.14 (346.56)
System Time 2426.37 (132.015)
Percent CPU 2066.8 (80.3754)
Context Switches 1.16588e+06 (134701)
Sleeps 1048810 (59891.2)

Average Optimal load -j 256 Run (std deviation):
Elapsed Time 239.81 (14.0759)
User Time 3797.7 (422.118)
System Time 2622.74 (225.361)
Percent CPU 2480.9 (446.735)
Context Switches 1.37301e+06 (238886)
Sleeps 1195957 (161659)

Average Maximal load -j Run (std deviation):
Elapsed Time 203.884 (8.59151)
User Time 3578.02 (482.79)
System Time 2759.9 (273.03)
Percent CPU 2663.53 (450.476)
Context Switches 1.33907e+06 (199658)
Sleeps 1119205 (172089)


... The odd thing is that the run with the patch is still less than the 
run with CONFIG_RESOURCE_COUNTERS off.  It was so odd that I double 
checked that I actually built in RESOURCE_COUNTERS and had applied the 
patch, both of which I had done.

P.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-12 10:53     ` Prarit Bhargava
@ 2009-08-12 16:19       ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-08-12 16:19 UTC (permalink / raw)
  To: Prarit Bhargava
  Cc: balbir, Andrew Morton, kamezawa.hiroyu, nishimura,
	kosaki.motohiro, menage, andi.kleen, xemul, lizf, linux-kernel,
	linux-mm

Prarit Bhargava さんは書きました:
>
>
> Balbir Singh wrote:
>> Hi, Andrew,
>>
>> Does this look better, could you please replace the older patch with
>> this one.
>>
>> 1. I did a quick compile test
>> 2. Ran scripts/checkpatch.pl
>>
>>
>>
>
> Andi Kleen suggested I use kernbench to profile the kernel.
>
> 2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS on
>
> Tue Aug 11 13:45:14 EDT 2009
> 2.6.31-rc5-git2-resources
> Average Half load -j 32 Run (std deviation):
> Elapsed Time 622.588 (119.243)
> User Time 4820.8 (962.286)
> System Time 9807.63 (2669.55)
> Percent CPU 2324 (167.236)
> Context Switches 2009606 (368703)
> Sleeps 1.24949e+06 (118210)
>
> Average Optimal load -j 256 Run (std deviation):
> Elapsed Time 770.97 (90.8685)
> User Time 5068.42 (750.933)
> System Time 21499.8 (12822.3)
> Percent CPU 3660 (1425.28)
> Context Switches 2.86467e+06 (971764)
> Sleeps 1.32784e+06 (129048)
>
> Average Maximal load -j Run (std deviation):
> Elapsed Time 757.018 (22.8371)
> User Time 4958.85 (644.65)
> System Time 24916.5 (11454.3)
> Percent CPU 4046.93 (1279.6)
> Context Switches 3.04894e+06 (826687)
> Sleeps 1.26053e+06 (146073)
>
>
> 2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS off
>
> Tue Aug 11 17:58:58 EDT 2009
> 2.6.31-rc5-git2-no-resources
> Average Half load -j 32 Run (std deviation):
> Elapsed Time 280.176 (21.1131)
> User Time 3558.51 (389.488)
> System Time 2393.87 (142.692)
> Percent CPU 2122.6 (50.5104)
> Context Switches 1.20474e+06 (131112)
> Sleeps 1062507 (59366.3)
>
> Average Optimal load -j 256 Run (std deviation):
> Elapsed Time 223.192 (42.7007)
> User Time 4243.19 (967.575)
> System Time 2649.57 (344.462)
> Percent CPU 2845.5 (856.217)
> Context Switches 1.52187e+06 (391821)
> Sleeps 1.28862e+06 (274222)
>
> Average Maximal load -j Run (std deviation):
> Elapsed Time 216.942 (45.4824)
> User Time 3860.46 (966.452)
> System Time 2782.17 (344.154)
> Percent CPU 2862.47 (720.904)
> Context Switches 1.43379e+06 (341021)
> Sleeps 1184325 (269392)
>
> 2.6.31-rc5-git2 w/ CONFIG_RESOURCE_COUNTERS on + patch
>
> Tue Aug 11 20:58:31 EDT 2009
> 2.6.31-rc5-git2-mem-patch
> Average Half load -j 32 Run (std deviation):
> Elapsed Time 285.788 (18.577)
> User Time 3483.14 (346.56)
> System Time 2426.37 (132.015)
> Percent CPU 2066.8 (80.3754)
> Context Switches 1.16588e+06 (134701)
> Sleeps 1048810 (59891.2)
>
> Average Optimal load -j 256 Run (std deviation):
> Elapsed Time 239.81 (14.0759)
> User Time 3797.7 (422.118)
> System Time 2622.74 (225.361)
> Percent CPU 2480.9 (446.735)
> Context Switches 1.37301e+06 (238886)
> Sleeps 1195957 (161659)
>
> Average Maximal load -j Run (std deviation):
> Elapsed Time 203.884 (8.59151)
> User Time 3578.02 (482.79)
> System Time 2759.9 (273.03)
> Percent CPU 2663.53 (450.476)
> Context Switches 1.33907e+06 (199658)
> Sleeps 1119205 (172089)
>
>
> ... The odd thing is that the run with the patch is still less than the
> run with CONFIG_RESOURCE_COUNTERS off.  It was so odd that I double
> checked that I actually built in RESOURCE_COUNTERS and had applied the
> patch, both of which I had done.
>
Hmm, CONFIG_RESOURCE_COUNTER actually means CONFIG_MEM_CGROUP ?


Then, more requests from me. (if you can)
Apply patch and set config ON. plz check your bench in following cases.

 1. boot with cgroup_disable=memory (will have the same effect as
    config=off)

    boot without above option...and
    2. Run your bench without mount memcg.
    3. Run your bench with memcg...as...
       #mount -tcgroup none /cgroups -omemory
       #mkdir /cgroups/test
       #echo $$ > /cgroups/test
       #kernbench.

Thanks,
-Kame






> P.
>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
  2009-08-12 10:53     ` Prarit Bhargava
@ 2009-08-12 16:28     ` KAMEZAWA Hiroyuki
  2009-08-12 17:19       ` Balbir Singh
  2009-08-13  1:03     ` Daisuke Nishimura
  2 siblings, 1 reply; 20+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-08-12 16:28 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, kamezawa.hiroyu, nishimura, kosaki.motohiro,
	menage, prarit, andi.kleen, xemul, lizf, linux-kernel, linux-mm

Balbir Singh wrote:
> Hi, Andrew,
>
> Does this look better, could you please replace the older patch with
> this one.
>
> 1. I did a quick compile test
> 2. Ran scripts/checkpatch.pl
>

In general, seems reasonable to me as quick hack for root cgroup.
thank you.

Reviewed-by: KAMEAZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Finally, we'll have to do some rework for light-weight res_counter.
But yes, it will take some amount of time.
My only concern is account leak, but, if some leak, it's current code's
bug, not this patch.

And..hmm...I like following style other than open-coded.
==
int mem_coutner_charge(struct mem_cgroup *mem)
{
      if (mem_cgroup_is_root(mem))
               return 0; // always success
      return res_counter_charge(....)
}
==
But maybe nitpick.

Thanks,
-Kame


>
>
> From: Balbir Singh <balbir@linux.vnet.ibm.com>
>
> Enhancement: Remove the overhead of root based resource counter accounting
>
> This patch reduces the resource counter overhead (mostly spinlock)
> associated with the root cgroup. This is a part of the several
> patches to reduce mem cgroup overhead. I had posted other
> approaches earlier (including using percpu counters). Those
> patches will be a natural addition and will be added iteratively
> on top of these.
>
> The patch stops resource counter accounting for the root cgroup.
> The data for display is derived from the statisitcs we maintain
> via mem_cgroup_charge_statistics (which is more scalable).
> What happens today is that, we do double accounting, once using
> res_counter_charge() and once using memory_cgroup_charge_statistics().
> For the root, since we don't implement limits any more, we don't
> need to track every charge via res_counter_charge() and check
> for limit being exceeded and reclaim.
>
> The main mem->res usage_in_bytes can be derived by summing
> the cache and rss usage data from memory statistics
> (MEM_CGRUP_STAT_RSS and MEM_CGROUP_STAT_CACHE). However, for
> memsw->res usage_in_bytes, we need additional data about
> swapped out memory. This patch adds a MEM_CGROUP_STAT_SWAPOUT
> and uses that along with MEM_CGROUP_STAT_RSS and MEM_CGROUP_STAT_CACHE
> to derive the memsw data.
>
> The tests results I see on a 24 way show that
>
> 1. The lock contention disappears from /proc/lock_stats
> 2. The results of the test are comparable to running with
>   cgroup_disable=memory.
>
> Here is a sample of my program runs
>
> Without Patch
>
>  Performance counter stats for '/home/balbir/parallel_pagefault':
>
>  7192804.124144  task-clock-msecs         #     23.937 CPUs
>          424691  context-switches         #      0.000 M/sec
>             267  CPU-migrations           #      0.000 M/sec
>        28498113  page-faults              #      0.004 M/sec
>   5826093739340  cycles                   #    809.989 M/sec
>    408883496292  instructions             #      0.070 IPC
>      7057079452  cache-references         #      0.981 M/sec
>      3036086243  cache-misses             #      0.422 M/sec
>
>   300.485365680  seconds time elapsed
>
> With cgroup_disable=memory
>
>  Performance counter stats for '/home/balbir/parallel_pagefault':
>
>  7182183.546587  task-clock-msecs         #     23.915 CPUs
>          425458  context-switches         #      0.000 M/sec
>             203  CPU-migrations           #      0.000 M/sec
>        92545093  page-faults              #      0.013 M/sec
>   6034363609986  cycles                   #    840.185 M/sec
>    437204346785  instructions             #      0.072 IPC
>      6636073192  cache-references         #      0.924 M/sec
>      2358117732  cache-misses             #      0.328 M/sec
>
>   300.320905827  seconds time elapsed
>
> With this patch applied
>
>  Performance counter stats for '/home/balbir/parallel_pagefault':
>
>  7191619.223977  task-clock-msecs         #     23.955 CPUs
>          422579  context-switches         #      0.000 M/sec
>              88  CPU-migrations           #      0.000 M/sec
>        91946060  page-faults              #      0.013 M/sec
>   5957054385619  cycles                   #    828.333 M/sec
>   1058117350365  instructions             #      0.178 IPC
>      9161776218  cache-references         #      1.274 M/sec
>      1920494280  cache-misses             #      0.267 M/sec
>
>   300.218764862  seconds time elapsed
>
>
> Data from Prarit (kernel compile with make -j64 on a 64
> CPU/32G machine)
>
> For a single run
>
> Without patch
>
> real 27m8.988s
> user 87m24.916s
> sys 382m6.037s
>
> With patch
>
> real    4m18.607s
> user    84m58.943s
> sys     50m52.682s
>
>
> With config turned off
>
> real    4m54.972s
> user    90m13.456s
> sys     50m19.711s
>
> NOTE: The data looks counterintuitive due to the increased performance
> with the patch, even over the config being turned off. We probably need
> more runs, but so far all testing has shown that the patches definitely
> help.
>
>
> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
> ---
>
>  mm/memcontrol.c |   89
> +++++++++++++++++++++++++++++++++++++++++++------------
>  1 files changed, 70 insertions(+), 19 deletions(-)
>
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 48a38e1..4f51885 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -70,6 +70,7 @@ enum mem_cgroup_stat_index {
>  	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
>  	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
>  	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
> +	MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
>
>  	MEM_CGROUP_STAT_NSTATS,
>  };
> @@ -478,6 +479,19 @@ mem_cgroup_largest_soft_limit_node(struct
> mem_cgroup_tree_per_zone *mctz)
>  	return mz;
>  }
>
> +static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
> +					 bool charge)
> +{
> +	int val = (charge) ? 1 : -1;
> +	struct mem_cgroup_stat *stat = &mem->stat;
> +	struct mem_cgroup_stat_cpu *cpustat;
> +	int cpu = get_cpu();
> +
> +	cpustat = &stat->cpustat[cpu];
> +	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
> +	put_cpu();
> +}
> +
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>  					 struct page_cgroup *pc,
>  					 bool charge)
> @@ -1281,9 +1295,11 @@ static int __mem_cgroup_try_charge(struct mm_struct
> *mm,
>  	VM_BUG_ON(css_is_removed(&mem->css));
>
>  	while (1) {
> -		int ret;
> +		int ret = 0;
>  		unsigned long flags = 0;
>
> +		if (mem_cgroup_is_root(mem))
> +			goto done;
>  		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
>  						&soft_fail_res);
>  		if (likely(!ret)) {
> @@ -1343,6 +1359,7 @@ static int __mem_cgroup_try_charge(struct mm_struct
> *mm,
>  		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
>  			mem_cgroup_update_tree(mem_over_soft_limit, page);
>  	}
> +done:
>  	return 0;
>  nomem:
>  	css_put(&mem->css);
> @@ -1415,9 +1432,12 @@ static void __mem_cgroup_commit_charge(struct
> mem_cgroup *mem,
>  	lock_page_cgroup(pc);
>  	if (unlikely(PageCgroupUsed(pc))) {
>  		unlock_page_cgroup(pc);
> -		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
> -		if (do_swap_account)
> -			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +		if (!mem_cgroup_is_root(mem)) {
> +			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
> +			if (do_swap_account)
> +				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
> +							NULL);
> +		}
>  		css_put(&mem->css);
>  		return;
>  	}
> @@ -1494,7 +1514,8 @@ static int mem_cgroup_move_account(struct
> page_cgroup *pc,
>  	if (pc->mem_cgroup != from)
>  		goto out;
>
> -	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
> +	if (!mem_cgroup_is_root(from))
> +		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
>  	mem_cgroup_charge_statistics(from, pc, false);
>
>  	page = pc->page;
> @@ -1513,7 +1534,7 @@ static int mem_cgroup_move_account(struct
> page_cgroup *pc,
>  						1);
>  	}
>
> -	if (do_swap_account)
> +	if (do_swap_account && !mem_cgroup_is_root(from))
>  		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
>  	css_put(&from->css);
>
> @@ -1584,9 +1605,11 @@ uncharge:
>  	/* drop extra refcnt by try_charge() */
>  	css_put(&parent->css);
>  	/* uncharge if move fails */
> -	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
> -	if (do_swap_account)
> -		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
> +	if (!mem_cgroup_is_root(parent)) {
> +		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
> +		if (do_swap_account)
> +			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
> +	}
>  	return ret;
>  }
>
> @@ -1775,7 +1798,10 @@ __mem_cgroup_commit_charge_swapin(struct page
> *page, struct mem_cgroup *ptr,
>  			 * This recorded memcg can be obsolete one. So, avoid
>  			 * calling css_tryget
>  			 */
> -			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
> +			if (!mem_cgroup_is_root(memcg))
> +				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
> +							NULL);
> +			mem_cgroup_swap_statistics(memcg, false);
>  			mem_cgroup_put(memcg);
>  		}
>  		rcu_read_unlock();
> @@ -1800,9 +1826,11 @@ void mem_cgroup_cancel_charge_swapin(struct
> mem_cgroup *mem)
>  		return;
>  	if (!mem)
>  		return;
> -	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
> -	if (do_swap_account)
> -		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	if (!mem_cgroup_is_root(mem)) {
> +		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
> +		if (do_swap_account)
> +			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	}
>  	css_put(&mem->css);
>  }
>
> @@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page,
> enum charge_type ctype)
>  		break;
>  	}
>
> -	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> -	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> -		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	if (!mem_cgroup_is_root(mem)) {
> +		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> +		if (do_swap_account &&
> +				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> +			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	}
> +	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
> +		mem_cgroup_swap_statistics(mem, true);
>  	mem_cgroup_charge_statistics(mem, pc, false);
>
>  	ClearPageCgroupUsed(pc);
> @@ -1948,7 +1981,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
>  		 * We uncharge this because swap is freed.
>  		 * This memcg can be obsolete one. We avoid calling css_tryget
>  		 */
> -		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
> +		if (!mem_cgroup_is_root(memcg))
> +			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
> +		mem_cgroup_swap_statistics(memcg, false);
>  		mem_cgroup_put(memcg);
>  	}
>  	rcu_read_unlock();
> @@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont,
> struct cftype *cft)
>  	name = MEMFILE_ATTR(cft->private);
>  	switch (type) {
>  	case _MEM:
> -		val = res_counter_read_u64(&mem->res, name);
> +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> +			val = mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_CACHE);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_RSS);
> +			val <<= PAGE_SHIFT;
> +		} else
> +			val = res_counter_read_u64(&mem->res, name);
>  		break;
>  	case _MEMSWAP:
> -		val = res_counter_read_u64(&mem->memsw, name);
> +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> +			val = mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_CACHE);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_RSS);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_SWAPOUT);
> +			val <<= PAGE_SHIFT;
> +		} else
> +			val = res_counter_read_u64(&mem->memsw, name);
>  		break;
>  	default:
>  		BUG();
>
> --
> 	Balbir
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-12 16:28     ` KAMEZAWA Hiroyuki
@ 2009-08-12 17:19       ` Balbir Singh
  0 siblings, 0 replies; 20+ messages in thread
From: Balbir Singh @ 2009-08-12 17:19 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Andrew Morton, nishimura, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

* KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> [2009-08-13 01:28:57]:

> Balbir Singh wrote:
> > Hi, Andrew,
> >
> > Does this look better, could you please replace the older patch with
> > this one.
> >
> > 1. I did a quick compile test
> > 2. Ran scripts/checkpatch.pl
> >
> 
> In general, seems reasonable to me as quick hack for root cgroup.
> thank you.
> 
> Reviewed-by: KAMEAZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>

Thanks, yes, we still need to do the percpu counter work, but this
will give us breathing space to do it correctly and define strict and
non-strict accounting.
 
> Finally, we'll have to do some rework for light-weight res_counter.
> But yes, it will take some amount of time.
> My only concern is account leak, but, if some leak, it's current code's
> bug, not this patch.
> 

Yeah.. we'll need to check for that.

> And..hmm...I like following style other than open-coded.
> ==
> int mem_coutner_charge(struct mem_cgroup *mem)
> {
>       if (mem_cgroup_is_root(mem))
>                return 0; // always success
>       return res_counter_charge(....)
> }
> ==
> But maybe nitpick.
> 

Yes, we can refactor for simplication, but we'll need some exceptions.
I'll do that as an add-on patch.



-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
  2009-08-12 10:53     ` Prarit Bhargava
  2009-08-12 16:28     ` KAMEZAWA Hiroyuki
@ 2009-08-13  1:03     ` Daisuke Nishimura
  2009-08-13  3:33       ` Balbir Singh
  2 siblings, 1 reply; 20+ messages in thread
From: Daisuke Nishimura @ 2009-08-13  1:03 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, kamezawa.hiroyu, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, nishimura, linux-kernel, linux-mm

> @@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
>  		break;
>  	}
>  
> -	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> -	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> -		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	if (!mem_cgroup_is_root(mem)) {
> +		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> +		if (do_swap_account &&
> +				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> +			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> +	}
> +	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
> +		mem_cgroup_swap_statistics(mem, true);
I think mem_cgroup_is_root(mem) would be unnecessary here.
Otherwise, MEM_CGROUP_STAT_SWAPOUT of groups except root memcgroup wouldn't
be counted properly.


> @@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
>  	name = MEMFILE_ATTR(cft->private);
>  	switch (type) {
>  	case _MEM:
> -		val = res_counter_read_u64(&mem->res, name);
> +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> +			val = mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_CACHE);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_RSS);
> +			val <<= PAGE_SHIFT;
> +		} else
> +			val = res_counter_read_u64(&mem->res, name);
>  		break;
>  	case _MEMSWAP:
> -		val = res_counter_read_u64(&mem->memsw, name);
> +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> +			val = mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_CACHE);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_RSS);
> +			val += mem_cgroup_read_stat(&mem->stat,
> +					MEM_CGROUP_STAT_SWAPOUT);
> +			val <<= PAGE_SHIFT;
> +		} else
> +			val = res_counter_read_u64(&mem->memsw, name);
>  		break;
>  	default:
>  		BUG();
Considering use_hierarchy==1 case in the root memcgroup, shouldn't we use
mem_cgroup_walk_tree() here to sum up all the children's usage ?
*.usage_in_bytes show sum of all the children's usage now if use_hierarchy==1.


Thanks,
Daisuke Nishimura.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-13  1:03     ` Daisuke Nishimura
@ 2009-08-13  3:33       ` Balbir Singh
  2009-08-13  5:08         ` Daisuke Nishimura
  0 siblings, 1 reply; 20+ messages in thread
From: Balbir Singh @ 2009-08-13  3:33 UTC (permalink / raw)
  To: Daisuke Nishimura
  Cc: Andrew Morton, kamezawa.hiroyu, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, linux-kernel, linux-mm

* nishimura@mxp.nes.nec.co.jp <nishimura@mxp.nes.nec.co.jp> [2009-08-13 10:03:50]:

> > @@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
> >  		break;
> >  	}
> >  
> > -	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> > -	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> > -		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> > +	if (!mem_cgroup_is_root(mem)) {
> > +		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> > +		if (do_swap_account &&
> > +				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> > +			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> > +	}
> > +	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
> > +		mem_cgroup_swap_statistics(mem, true);
> I think mem_cgroup_is_root(mem) would be unnecessary here.
> Otherwise, MEM_CGROUP_STAT_SWAPOUT of groups except root memcgroup wouldn't
> be counted properly.
> 

I think you have a valid point, but it will not impact us currently
since we use SWAPOUT only for root accounting.

> 
> > @@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
> >  	name = MEMFILE_ATTR(cft->private);
> >  	switch (type) {
> >  	case _MEM:
> > -		val = res_counter_read_u64(&mem->res, name);
> > +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> > +			val = mem_cgroup_read_stat(&mem->stat,
> > +					MEM_CGROUP_STAT_CACHE);
> > +			val += mem_cgroup_read_stat(&mem->stat,
> > +					MEM_CGROUP_STAT_RSS);
> > +			val <<= PAGE_SHIFT;
> > +		} else
> > +			val = res_counter_read_u64(&mem->res, name);
> >  		break;
> >  	case _MEMSWAP:
> > -		val = res_counter_read_u64(&mem->memsw, name);
> > +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> > +			val = mem_cgroup_read_stat(&mem->stat,
> > +					MEM_CGROUP_STAT_CACHE);
> > +			val += mem_cgroup_read_stat(&mem->stat,
> > +					MEM_CGROUP_STAT_RSS);
> > +			val += mem_cgroup_read_stat(&mem->stat,
> > +					MEM_CGROUP_STAT_SWAPOUT);
> > +			val <<= PAGE_SHIFT;
> > +		} else
> > +			val = res_counter_read_u64(&mem->memsw, name);
> >  		break;
> >  	default:
> >  		BUG();
> Considering use_hierarchy==1 case in the root memcgroup, shouldn't we use
> mem_cgroup_walk_tree() here to sum up all the children's usage ?
> *.usage_in_bytes show sum of all the children's usage now if use_hierarchy==1.

If memory.use_hiearchy=1, we should use total_stats..right. Let me
send out a newer version for review.


-- 
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] Help Resource Counters Scale better (v4.1)
  2009-08-13  3:33       ` Balbir Singh
@ 2009-08-13  5:08         ` Daisuke Nishimura
  0 siblings, 0 replies; 20+ messages in thread
From: Daisuke Nishimura @ 2009-08-13  5:08 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, kamezawa.hiroyu, kosaki.motohiro, menage, prarit,
	andi.kleen, xemul, lizf, nishimura, linux-kernel, linux-mm

On Thu, 13 Aug 2009 09:03:35 +0530, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> * nishimura@mxp.nes.nec.co.jp <nishimura@mxp.nes.nec.co.jp> [2009-08-13 10:03:50]:
> 
> > > @@ -1855,9 +1883,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
> > >  		break;
> > >  	}
> > >  
> > > -	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> > > -	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> > > -		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> > > +	if (!mem_cgroup_is_root(mem)) {
> > > +		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
> > > +		if (do_swap_account &&
> > > +				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
> > > +			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
> > > +	}
> > > +	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT && mem_cgroup_is_root(mem))
> > > +		mem_cgroup_swap_statistics(mem, true);
> > I think mem_cgroup_is_root(mem) would be unnecessary here.
> > Otherwise, MEM_CGROUP_STAT_SWAPOUT of groups except root memcgroup wouldn't
> > be counted properly.
> > 
> 
> I think you have a valid point, but it will not impact us currently
> since we use SWAPOUT only for root accounting.
> 
> > 
> > > @@ -2461,10 +2496,26 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
> > >  	name = MEMFILE_ATTR(cft->private);
> > >  	switch (type) {
> > >  	case _MEM:
> > > -		val = res_counter_read_u64(&mem->res, name);
> > > +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> > > +			val = mem_cgroup_read_stat(&mem->stat,
> > > +					MEM_CGROUP_STAT_CACHE);
> > > +			val += mem_cgroup_read_stat(&mem->stat,
> > > +					MEM_CGROUP_STAT_RSS);
> > > +			val <<= PAGE_SHIFT;
> > > +		} else
> > > +			val = res_counter_read_u64(&mem->res, name);
> > >  		break;
> > >  	case _MEMSWAP:
> > > -		val = res_counter_read_u64(&mem->memsw, name);
> > > +		if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
> > > +			val = mem_cgroup_read_stat(&mem->stat,
> > > +					MEM_CGROUP_STAT_CACHE);
> > > +			val += mem_cgroup_read_stat(&mem->stat,
> > > +					MEM_CGROUP_STAT_RSS);
> > > +			val += mem_cgroup_read_stat(&mem->stat,
> > > +					MEM_CGROUP_STAT_SWAPOUT);
> > > +			val <<= PAGE_SHIFT;
> > > +		} else
> > > +			val = res_counter_read_u64(&mem->memsw, name);
> > >  		break;
> > >  	default:
> > >  		BUG();
> > Considering use_hierarchy==1 case in the root memcgroup, shouldn't we use
> > mem_cgroup_walk_tree() here to sum up all the children's usage ?
> > *.usage_in_bytes show sum of all the children's usage now if use_hierarchy==1.
> 
> If memory.use_hiearchy=1, we should use total_stats..right. Let me
> send out a newer version for review.
> 
BTW, I don't think the patch title is suitable for this patch.
This patch doesn't make res_counter scalable at all :)

Of course, I think the patch using percpu_counter for scalability is also important.


Thanks,
Daisuke Nishimura.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2009-08-13  5:11 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-11 14:44 Help Resource Counters Scale better (v4) Balbir Singh
2009-08-11 14:54 ` Prarit Bhargava
2009-08-11 15:00   ` Balbir Singh
2009-08-11 15:11     ` Prarit Bhargava
2009-08-11 15:14   ` Prarit Bhargava
2009-08-11 15:20   ` Andi Kleen
2009-08-11 15:21     ` Prarit Bhargava
2009-08-11 16:52 ` Balbir Singh
2009-08-11 23:31 ` Andrew Morton
2009-08-12  2:34   ` Daisuke Nishimura
2009-08-12  3:56   ` Balbir Singh
2009-08-12  3:57   ` Balbir Singh
2009-08-12  4:57   ` [PATCH] Help Resource Counters Scale better (v4.1) Balbir Singh
2009-08-12 10:53     ` Prarit Bhargava
2009-08-12 16:19       ` KAMEZAWA Hiroyuki
2009-08-12 16:28     ` KAMEZAWA Hiroyuki
2009-08-12 17:19       ` Balbir Singh
2009-08-13  1:03     ` Daisuke Nishimura
2009-08-13  3:33       ` Balbir Singh
2009-08-13  5:08         ` Daisuke Nishimura

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox