[RFC][PATCH 6/11] memcg: cleaun up percpu statistics

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: [RFC][PATCH 6/11] memcg: cleaun up percpu statistics
Date: Fri, 18 Sep 2009 17:57:24 +0900	[thread overview]
Message-ID: <20090918175724.e40c421a.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20090918174757.672f1e8e.kamezawa.hiroyu@jp.fujitsu.com>

mem_cgroup has per cpu statistics counter. When implemented,
the users are limited and very low level interface was enough.

But in these days, there are more users. This patch is for cleaning up them.
And adds more precise commentary.

Diff seems big but this patch includes big code moving....
This moves percpu stat access function after definition of mem_cgroup.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 mm/memcontrol.c |  224 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 125 insertions(+), 99 deletions(-)

Index: mmotm-2.6.31-Sep17/mm/memcontrol.c
===================================================================
--- mmotm-2.6.31-Sep17.orig/mm/memcontrol.c
+++ mmotm-2.6.31-Sep17/mm/memcontrol.c
@@ -59,7 +59,7 @@ static DEFINE_MUTEX(memcg_tasklist);	/* 
 #define SOFTLIMIT_EVENTS_THRESH (1000)
 
 /*
- * Statistics for memory cgroup.
+ * Statistics for memory cgroup. accounted per cpu.
  */
 enum mem_cgroup_stat_index {
 	/*
@@ -84,48 +84,6 @@ struct mem_cgroup_stat {
 	struct mem_cgroup_stat_cpu cpustat[0];
 };
 
-static inline void
-__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
-				enum mem_cgroup_stat_index idx)
-{
-	stat->count[idx] = 0;
-}
-
-static inline s64
-__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
-				enum mem_cgroup_stat_index idx)
-{
-	return stat->count[idx];
-}
-
-/*
- * For accounting under irq disable, no need for increment preempt count.
- */
-static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
-		enum mem_cgroup_stat_index idx, int val)
-{
-	stat->count[idx] += val;
-}
-
-static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
-		enum mem_cgroup_stat_index idx)
-{
-	int cpu;
-	s64 ret = 0;
-	for_each_possible_cpu(cpu)
-		ret += stat->cpustat[cpu].count[idx];
-	return ret;
-}
-
-static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
-{
-	s64 ret;
-
-	ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
-	ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
-	return ret;
-}
-
 /*
  * per-zone information in memory controller.
  */
@@ -278,6 +236,101 @@ static void mem_cgroup_put(struct mem_cg
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 static void drain_all_stock_async(void);
 
+/*
+ * Functions for acceccing cpu local statistics. modification should be
+ * done under preempt disabled. __mem_cgroup_xxx functions are for low level.
+ */
+static inline void
+__mem_cgroup_stat_reset_local(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	stat->count[idx] = 0;
+}
+
+static inline void
+mem_cgroup_stat_reset_local(struct mem_cgroup *mem,
+			enum mem_cgroup_stat_index idx)
+{
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cstat;
+	int cpu = get_cpu();
+
+	cstat = &stat->cpustat[cpu];
+	__mem_cgroup_stat_reset_local(cstat, idx);
+	put_cpu();
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	return stat->count[idx];
+}
+
+static inline s64
+mem_cgroup_stat_read_local(struct mem_cgroup *mem,
+			enum mem_cgroup_stat_index idx)
+{
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cstat;
+	int cpu = get_cpu();
+	s64 val;
+
+	cstat = &stat->cpustat[cpu];
+	val = __mem_cgroup_stat_read_local(cstat, idx);
+	put_cpu();
+	return val;
+}
+
+static inline void __mem_cgroup_stat_add_local(struct mem_cgroup_stat_cpu *stat,
+		enum mem_cgroup_stat_index idx, int val)
+{
+	stat->count[idx] += val;
+}
+
+static inline void
+mem_cgroup_stat_add_local(struct mem_cgroup *mem,
+		enum mem_cgroup_stat_index idx, int val)
+{
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cstat;
+	int cpu = get_cpu();
+
+	cstat = &stat->cpustat[cpu];
+	__mem_cgroup_stat_add_local(cstat, idx, val);
+	put_cpu();
+}
+
+/*
+ * A function for reading sum of all percpu statistics.
+ * Will be slow on big machines.
+ */
+static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
+		enum mem_cgroup_stat_index idx)
+{
+	int cpu;
+	s64 ret = 0;
+	struct mem_cgroup_stat *stat = &mem->stat;
+
+	for_each_possible_cpu(cpu)
+		ret += stat->cpustat[cpu].count[idx];
+	return ret;
+}
+/*
+ * When mem_cgroup is used with hierarchy inheritance enabled, cgroup local
+ * memory usage is just shown by sum of percpu statitics. This function returns
+ * cgroup local memory usage even if it's under hierarchy.
+ */
+static s64 mem_cgroup_local_usage(struct mem_cgroup *mem)
+{
+	s64 ret;
+
+	ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
+	ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
+	return ret;
+}
+
+
 static struct mem_cgroup_per_zone *
 mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 {
@@ -370,18 +423,13 @@ mem_cgroup_remove_exceeded(struct mem_cg
 static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
 {
 	bool ret = false;
-	int cpu;
 	s64 val;
-	struct mem_cgroup_stat_cpu *cpustat;
 
-	cpu = get_cpu();
-	cpustat = &mem->stat.cpustat[cpu];
-	val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+	val = mem_cgroup_stat_read_local(mem, MEM_CGROUP_STAT_EVENTS);
 	if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
-		__mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+		mem_cgroup_stat_reset_local(mem, MEM_CGROUP_STAT_EVENTS);
 		ret = true;
 	}
-	put_cpu();
 	return ret;
 }
 
@@ -480,13 +528,7 @@ static void mem_cgroup_swap_statistics(s
 					 bool charge)
 {
 	int val = (charge) ? 1 : -1;
-	struct mem_cgroup_stat *stat = &mem->stat;
-	struct mem_cgroup_stat_cpu *cpustat;
-	int cpu = get_cpu();
-
-	cpustat = &stat->cpustat[cpu];
-	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
-	put_cpu();
+	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_SWAPOUT, val);
 }
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
@@ -495,22 +537,22 @@ static void mem_cgroup_charge_statistics
 {
 	int val = (charge) ? 1 : -1;
 	struct mem_cgroup_stat *stat = &mem->stat;
-	struct mem_cgroup_stat_cpu *cpustat;
+	struct mem_cgroup_stat_cpu *cstat;
 	int cpu = get_cpu();
-
-	cpustat = &stat->cpustat[cpu];
+	/* for fast access, we use open-coded manner */
+	cstat = &stat->cpustat[cpu];
 	if (PageCgroupCache(pc))
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
+		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_CACHE, val);
 	else
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
+		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_RSS, val);
 
 	if (charge)
-		__mem_cgroup_stat_add_safe(cpustat,
+		__mem_cgroup_stat_add_local(cstat,
 				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
 	else
-		__mem_cgroup_stat_add_safe(cpustat,
+		__mem_cgroup_stat_add_local(cstat,
 				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
-	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
+	__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_EVENTS, 1);
 	put_cpu();
 }
 
@@ -1163,7 +1205,11 @@ static int mem_cgroup_hierarchical_recla
 				}
 			}
 		}
-		if (!mem_cgroup_local_usage(&victim->stat)) {
+		/*
+		 * mem->res can includes children cgroup's memory usage.What
+		 * we need to check here is local usage.
+		 */
+		if (!mem_cgroup_local_usage(victim)) {
 			/* this cgroup's local usage == 0 */
 			css_put(&victim->css);
 			continue;
@@ -1229,9 +1275,6 @@ static void record_last_oom(struct mem_c
 void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
 {
 	struct mem_cgroup *mem;
-	struct mem_cgroup_stat *stat;
-	struct mem_cgroup_stat_cpu *cpustat;
-	int cpu;
 	struct page_cgroup *pc;
 
 	if (!page_is_file_cache(page))
@@ -1249,14 +1292,7 @@ void mem_cgroup_update_mapped_file_stat(
 	if (!PageCgroupUsed(pc))
 		goto done;
 
-	/*
-	 * Preemption is already disabled, we don't need get_cpu()
-	 */
-	cpu = smp_processor_id();
-	stat = &mem->stat;
-	cpustat = &stat->cpustat[cpu];
-
-	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val);
+	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_MAPPED_FILE, val);
 done:
 	unlock_page_cgroup(pc);
 }
@@ -1607,9 +1643,6 @@ static int mem_cgroup_move_account(struc
 	int nid, zid;
 	int ret = -EBUSY;
 	struct page *page;
-	int cpu;
-	struct mem_cgroup_stat *stat;
-	struct mem_cgroup_stat_cpu *cpustat;
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON(PageLRU(pc->page));
@@ -1634,18 +1667,11 @@ static int mem_cgroup_move_account(struc
 
 	page = pc->page;
 	if (page_is_file_cache(page) && page_mapped(page)) {
-		cpu = smp_processor_id();
-		/* Update mapped_file data for mem_cgroup "from" */
-		stat = &from->stat;
-		cpustat = &stat->cpustat[cpu];
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
-						-1);
-
-		/* Update mapped_file data for mem_cgroup "to" */
-		stat = &to->stat;
-		cpustat = &stat->cpustat[cpu];
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
-						1);
+		/* decrement mapped_file data for mem_cgroup "from" */
+		mem_cgroup_stat_add_local(from,
+				MEM_CGROUP_STAT_MAPPED_FILE, -1);
+		/* increment mapped_file data for mem_cgroup "to" */
+		mem_cgroup_stat_add_local(to, MEM_CGROUP_STAT_MAPPED_FILE, 1);
 	}
 
 	if (do_swap_account && !mem_cgroup_is_root(from))
@@ -2685,7 +2711,7 @@ static int
 mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
 {
 	struct mem_cgroup_idx_data *d = data;
-	d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+	d->val += mem_cgroup_read_stat(mem, d->idx);
 	return 0;
 }
 
@@ -2890,18 +2916,18 @@ static int mem_cgroup_get_local_stat(str
 	s64 val;
 
 	/* per cpu stat */
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE);
+	val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
 	s->stat[MCS_CACHE] += val * PAGE_SIZE;
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
+	val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
 	s->stat[MCS_RSS] += val * PAGE_SIZE;
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE);
+	val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_MAPPED_FILE);
 	s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE;
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
+	val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGIN_COUNT);
 	s->stat[MCS_PGPGIN] += val;
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
+	val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGOUT_COUNT);
 	s->stat[MCS_PGPGOUT] += val;
 	if (do_swap_account) {
-		val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+		val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
 		s->stat[MCS_SWAP] += val * PAGE_SIZE;
 	}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2009-09-18  8:59 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09  8:39 [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 KAMEZAWA Hiroyuki
2009-09-09  8:41 ` [RFC][PATCH 1/4][mmotm] memcg: soft limit clean up KAMEZAWA Hiroyuki
     [not found]   ` <661de9470909090410t160454a2k658c980b92d11612@mail.gmail.com>
2009-09-10  0:10     ` KAMEZAWA Hiroyuki
2009-09-09  8:41 ` [RFC][PATCH 2/4][mmotm] clean up charge path of softlimit KAMEZAWA Hiroyuki
2009-09-09  8:44 ` [RFC][PATCH 3/4][mmotm] memcg: batched uncharge KAMEZAWA Hiroyuki
2009-09-09  8:45 ` [RFC][PATCH 4/4][mmotm] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-12  4:58   ` Daisuke Nishimura
2009-09-15  0:09     ` KAMEZAWA Hiroyuki
2009-09-09 20:30 ` [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 Balbir Singh
2009-09-10  0:20   ` KAMEZAWA Hiroyuki
2009-09-10  5:18     ` Balbir Singh
2009-09-18  8:47 ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) KAMEZAWA Hiroyuki
2009-09-18  8:50   ` [RFC][PATCH 1/11] memcg: clean up softlimit uncharge KAMEZAWA Hiroyuki
2009-09-18  8:52   ` [RFC][PATCH 2/11]memcg: reduce res_counter_soft_limit_excess KAMEZAWA Hiroyuki
2009-09-18  8:53   ` [RFC][PATCH 3/11] memcg: coalescing uncharge KAMEZAWA Hiroyuki
2009-09-18  8:54   ` [RFC][PATCH 4/11] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-18  8:55   ` [RFC][PATCH 5/11] memcg: clean up cancel charge KAMEZAWA Hiroyuki
2009-09-18  8:57   ` KAMEZAWA Hiroyuki [this message]
2009-09-18  8:58   ` [RFC][PATCH 7/11] memcg: rename from_cont to from_cgroup KAMEZAWA Hiroyuki
2009-09-18  9:00   ` [RFC][PATCH 8/11]memcg: remove unused macro and adds commentary KAMEZAWA Hiroyuki
2009-09-18  9:01   ` [RFC][PATCH 9/11]memcg: clean up zonestat funcs KAMEZAWA Hiroyuki
2009-09-18  9:04   ` [RFC][PATCH 10/11][mmotm] memcg: clean up percpu and more commentary for soft limit KAMEZAWA Hiroyuki
2009-09-18  9:06   ` [RFC][PATCH 11/11][mmotm] memcg: more commentary and clean up KAMEZAWA Hiroyuki
2009-09-18 10:37   ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) Daisuke Nishimura

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090918175724.e40c421a.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox