[RFC][PATCH 10/11][mmotm] memcg: clean up percpu and more commentary for soft limit

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: [RFC][PATCH 10/11][mmotm] memcg: clean up percpu and more commentary for soft limit
Date: Fri, 18 Sep 2009 18:04:19 +0900	[thread overview]
Message-ID: <20090918180419.fc511373.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20090918174757.672f1e8e.kamezawa.hiroyu@jp.fujitsu.com>

yes, should be separeted to 2 patches...

==
This patch does
  - adds some commentary on softlimit codes.
  - moves per-cpu statitics code right after percpu stat functions.

Signed-off-by: KAMEZAWA Hiroyuki  <kamezawa.hiroyu@jp.fujitsu.com>
---
 mm/memcontrol.c |  161 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 97 insertions(+), 64 deletions(-)

Index: mmotm-2.6.31-Sep17/mm/memcontrol.c
===================================================================
--- mmotm-2.6.31-Sep17.orig/mm/memcontrol.c
+++ mmotm-2.6.31-Sep17/mm/memcontrol.c
@@ -56,7 +56,7 @@ static int really_do_swap_account __init
 #endif
 
 static DEFINE_MUTEX(memcg_tasklist);	/* can be hold under cgroup_mutex */
-#define SOFTLIMIT_EVENTS_THRESH (1000)
+
 
 /*
  * Statistics for memory cgroup. accounted per cpu.
@@ -118,8 +118,9 @@ struct mem_cgroup_lru_info {
 };
 
 /*
- * Cgroups above their limits are maintained in a RB-Tree, independent of
- * their hierarchy representation
+ * Cgroups above their soft-limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation. This RB-tree is system-wide but maintained
+ * per zone.
  */
 
 struct mem_cgroup_tree_per_zone {
@@ -415,6 +416,70 @@ static s64 mem_cgroup_local_usage(struct
 }
 
 
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+					 bool charge)
+{
+	int val = (charge) ? 1 : -1;
+	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_SWAPOUT, val);
+}
+
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+					 struct page_cgroup *pc,
+					 bool charge)
+{
+	int val = (charge) ? 1 : -1;
+	struct mem_cgroup_stat *stat = &mem->stat;
+	struct mem_cgroup_stat_cpu *cstat;
+	int cpu = get_cpu();
+	/* for fast access, we use open-coded manner */
+	cstat = &stat->cpustat[cpu];
+	if (PageCgroupCache(pc))
+		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_CACHE, val);
+	else
+		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_RSS, val);
+
+	if (charge)
+		__mem_cgroup_stat_add_local(cstat,
+				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
+	else
+		__mem_cgroup_stat_add_local(cstat,
+				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+	__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_EVENTS, 1);
+	put_cpu();
+}
+
+/*
+ * Currently used to update mapped file statistics, but the routine can be
+ * generalized to update other statistics as well.
+ */
+void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
+{
+	struct mem_cgroup *mem;
+	struct page_cgroup *pc;
+
+	if (!page_is_file_cache(page))
+		return;
+
+	pc = lookup_page_cgroup(page);
+	if (unlikely(!pc))
+		return;
+
+	lock_page_cgroup(pc);
+	mem = pc->mem_cgroup;
+	if (!mem)
+		goto done;
+
+	if (!PageCgroupUsed(pc))
+		goto done;
+
+	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_MAPPED_FILE, val);
+done:
+	unlock_page_cgroup(pc);
+}
+
+/*
+ * For per-zone statistics.
+ */
 static struct mem_cgroup_per_zone *
 mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 {
@@ -460,6 +525,17 @@ static unsigned long mem_cgroup_get_zone
 	return total;
 }
 
+/*
+ * Followings are functions for per-zone memcg softlimit RB-tree management.
+ * Tree is system-wide but maintained per zone.
+ */
+
+/*
+ * Soft limit uses percpu event counter for status check instead of checking
+ * status at every charge/uncharge.
+ */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
+
 static struct mem_cgroup_tree_per_zone *
 soft_limit_tree_node_zone(int nid, int zid)
 {
@@ -472,9 +548,14 @@ soft_limit_tree_from_page(struct page *p
 	int nid = page_to_nid(page);
 	int zid = page_zonenum(page);
 
-	return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+	return soft_limit_tree_node_zone(nid, zid);
 }
 
+/*
+ * Insert memcg's per-zone struct onto softlimit RB-tree. For inserting,
+ * mz should be not on tree. Tree-lock is held before calling this.
+ * tree lock (mctz->lock) should be held.
+ */
 static void
 __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
@@ -530,6 +611,10 @@ mem_cgroup_remove_exceeded(struct mem_cg
 	spin_unlock(&mctz->lock);
 }
 
+/*
+ * Check per-cpu EVENT COUNTER. If it's over threshold, we check
+ * how memory uasge exceeds softlimit and update tree.
+ */
 static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
 {
 	bool ret = false;
@@ -543,6 +628,11 @@ static bool mem_cgroup_soft_limit_check(
 	return ret;
 }
 
+/*
+ * This function updates soft-limit RB-tree by checking "excess" of
+ * memcgs. When hierarchy is used, all ancestors have to be updated, too.
+ */
+
 static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
 {
 	unsigned long long excess;
@@ -598,6 +688,9 @@ static inline unsigned long mem_cgroup_g
 	return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
 }
 
+/*
+ * Check RB-tree of a zone and find a memcg which has the largest "excess"
+ */
 static struct mem_cgroup_per_zone *
 __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 {
@@ -634,38 +727,6 @@ mem_cgroup_largest_soft_limit_node(struc
 	return mz;
 }
 
-static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
-					 bool charge)
-{
-	int val = (charge) ? 1 : -1;
-	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_SWAPOUT, val);
-}
-
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
-					 struct page_cgroup *pc,
-					 bool charge)
-{
-	int val = (charge) ? 1 : -1;
-	struct mem_cgroup_stat *stat = &mem->stat;
-	struct mem_cgroup_stat_cpu *cstat;
-	int cpu = get_cpu();
-	/* for fast access, we use open-coded manner */
-	cstat = &stat->cpustat[cpu];
-	if (PageCgroupCache(pc))
-		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_CACHE, val);
-	else
-		__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_RSS, val);
-
-	if (charge)
-		__mem_cgroup_stat_add_local(cstat,
-				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
-	else
-		__mem_cgroup_stat_add_local(cstat,
-				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
-	__mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_EVENTS, 1);
-	put_cpu();
-}
-
 
 /*
  * Call callback function against all cgroup under hierarchy tree.
@@ -1305,34 +1366,6 @@ static void record_last_oom(struct mem_c
 	mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
 }
 
-/*
- * Currently used to update mapped file statistics, but the routine can be
- * generalized to update other statistics as well.
- */
-void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
-{
-	struct mem_cgroup *mem;
-	struct page_cgroup *pc;
-
-	if (!page_is_file_cache(page))
-		return;
-
-	pc = lookup_page_cgroup(page);
-	if (unlikely(!pc))
-		return;
-
-	lock_page_cgroup(pc);
-	mem = pc->mem_cgroup;
-	if (!mem)
-		goto done;
-
-	if (!PageCgroupUsed(pc))
-		goto done;
-
-	mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_MAPPED_FILE, val);
-done:
-	unlock_page_cgroup(pc);
-}
 
 #define CHARGE_SIZE	(64 * PAGE_SIZE)
 struct memcg_stock_pcp {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2009-09-18  9:06 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09  8:39 [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 KAMEZAWA Hiroyuki
2009-09-09  8:41 ` [RFC][PATCH 1/4][mmotm] memcg: soft limit clean up KAMEZAWA Hiroyuki
     [not found]   ` <661de9470909090410t160454a2k658c980b92d11612@mail.gmail.com>
2009-09-10  0:10     ` KAMEZAWA Hiroyuki
2009-09-09  8:41 ` [RFC][PATCH 2/4][mmotm] clean up charge path of softlimit KAMEZAWA Hiroyuki
2009-09-09  8:44 ` [RFC][PATCH 3/4][mmotm] memcg: batched uncharge KAMEZAWA Hiroyuki
2009-09-09  8:45 ` [RFC][PATCH 4/4][mmotm] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-12  4:58   ` Daisuke Nishimura
2009-09-15  0:09     ` KAMEZAWA Hiroyuki
2009-09-09 20:30 ` [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 Balbir Singh
2009-09-10  0:20   ` KAMEZAWA Hiroyuki
2009-09-10  5:18     ` Balbir Singh
2009-09-18  8:47 ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) KAMEZAWA Hiroyuki
2009-09-18  8:50   ` [RFC][PATCH 1/11] memcg: clean up softlimit uncharge KAMEZAWA Hiroyuki
2009-09-18  8:52   ` [RFC][PATCH 2/11]memcg: reduce res_counter_soft_limit_excess KAMEZAWA Hiroyuki
2009-09-18  8:53   ` [RFC][PATCH 3/11] memcg: coalescing uncharge KAMEZAWA Hiroyuki
2009-09-18  8:54   ` [RFC][PATCH 4/11] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-18  8:55   ` [RFC][PATCH 5/11] memcg: clean up cancel charge KAMEZAWA Hiroyuki
2009-09-18  8:57   ` [RFC][PATCH 6/11] memcg: cleaun up percpu statistics KAMEZAWA Hiroyuki
2009-09-18  8:58   ` [RFC][PATCH 7/11] memcg: rename from_cont to from_cgroup KAMEZAWA Hiroyuki
2009-09-18  9:00   ` [RFC][PATCH 8/11]memcg: remove unused macro and adds commentary KAMEZAWA Hiroyuki
2009-09-18  9:01   ` [RFC][PATCH 9/11]memcg: clean up zonestat funcs KAMEZAWA Hiroyuki
2009-09-18  9:04   ` KAMEZAWA Hiroyuki [this message]
2009-09-18  9:06   ` [RFC][PATCH 11/11][mmotm] memcg: more commentary and clean up KAMEZAWA Hiroyuki
2009-09-18 10:37   ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) Daisuke Nishimura

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090918180419.fc511373.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox