From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: [RFC][PATCH 10/11][mmotm] memcg: clean up percpu and more commentary for soft limit
Date: Fri, 18 Sep 2009 18:04:19 +0900 [thread overview]
Message-ID: <20090918180419.fc511373.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20090918174757.672f1e8e.kamezawa.hiroyu@jp.fujitsu.com>
yes, should be separeted to 2 patches...
==
This patch does
- adds some commentary on softlimit codes.
- moves per-cpu statitics code right after percpu stat functions.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
mm/memcontrol.c | 161 +++++++++++++++++++++++++++++++++-----------------------
1 file changed, 97 insertions(+), 64 deletions(-)
Index: mmotm-2.6.31-Sep17/mm/memcontrol.c
===================================================================
--- mmotm-2.6.31-Sep17.orig/mm/memcontrol.c
+++ mmotm-2.6.31-Sep17/mm/memcontrol.c
@@ -56,7 +56,7 @@ static int really_do_swap_account __init
#endif
static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
-#define SOFTLIMIT_EVENTS_THRESH (1000)
+
/*
* Statistics for memory cgroup. accounted per cpu.
@@ -118,8 +118,9 @@ struct mem_cgroup_lru_info {
};
/*
- * Cgroups above their limits are maintained in a RB-Tree, independent of
- * their hierarchy representation
+ * Cgroups above their soft-limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation. This RB-tree is system-wide but maintained
+ * per zone.
*/
struct mem_cgroup_tree_per_zone {
@@ -415,6 +416,70 @@ static s64 mem_cgroup_local_usage(struct
}
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+ bool charge)
+{
+ int val = (charge) ? 1 : -1;
+ mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_SWAPOUT, val);
+}
+
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+ struct page_cgroup *pc,
+ bool charge)
+{
+ int val = (charge) ? 1 : -1;
+ struct mem_cgroup_stat *stat = &mem->stat;
+ struct mem_cgroup_stat_cpu *cstat;
+ int cpu = get_cpu();
+ /* for fast access, we use open-coded manner */
+ cstat = &stat->cpustat[cpu];
+ if (PageCgroupCache(pc))
+ __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_CACHE, val);
+ else
+ __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_RSS, val);
+
+ if (charge)
+ __mem_cgroup_stat_add_local(cstat,
+ MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
+ else
+ __mem_cgroup_stat_add_local(cstat,
+ MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+ __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_EVENTS, 1);
+ put_cpu();
+}
+
+/*
+ * Currently used to update mapped file statistics, but the routine can be
+ * generalized to update other statistics as well.
+ */
+void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
+{
+ struct mem_cgroup *mem;
+ struct page_cgroup *pc;
+
+ if (!page_is_file_cache(page))
+ return;
+
+ pc = lookup_page_cgroup(page);
+ if (unlikely(!pc))
+ return;
+
+ lock_page_cgroup(pc);
+ mem = pc->mem_cgroup;
+ if (!mem)
+ goto done;
+
+ if (!PageCgroupUsed(pc))
+ goto done;
+
+ mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_MAPPED_FILE, val);
+done:
+ unlock_page_cgroup(pc);
+}
+
+/*
+ * For per-zone statistics.
+ */
static struct mem_cgroup_per_zone *
mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
{
@@ -460,6 +525,17 @@ static unsigned long mem_cgroup_get_zone
return total;
}
+/*
+ * Followings are functions for per-zone memcg softlimit RB-tree management.
+ * Tree is system-wide but maintained per zone.
+ */
+
+/*
+ * Soft limit uses percpu event counter for status check instead of checking
+ * status at every charge/uncharge.
+ */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
+
static struct mem_cgroup_tree_per_zone *
soft_limit_tree_node_zone(int nid, int zid)
{
@@ -472,9 +548,14 @@ soft_limit_tree_from_page(struct page *p
int nid = page_to_nid(page);
int zid = page_zonenum(page);
- return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+ return soft_limit_tree_node_zone(nid, zid);
}
+/*
+ * Insert memcg's per-zone struct onto softlimit RB-tree. For inserting,
+ * mz should be not on tree. Tree-lock is held before calling this.
+ * tree lock (mctz->lock) should be held.
+ */
static void
__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
struct mem_cgroup_per_zone *mz,
@@ -530,6 +611,10 @@ mem_cgroup_remove_exceeded(struct mem_cg
spin_unlock(&mctz->lock);
}
+/*
+ * Check per-cpu EVENT COUNTER. If it's over threshold, we check
+ * how memory uasge exceeds softlimit and update tree.
+ */
static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
{
bool ret = false;
@@ -543,6 +628,11 @@ static bool mem_cgroup_soft_limit_check(
return ret;
}
+/*
+ * This function updates soft-limit RB-tree by checking "excess" of
+ * memcgs. When hierarchy is used, all ancestors have to be updated, too.
+ */
+
static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
{
unsigned long long excess;
@@ -598,6 +688,9 @@ static inline unsigned long mem_cgroup_g
return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
}
+/*
+ * Check RB-tree of a zone and find a memcg which has the largest "excess"
+ */
static struct mem_cgroup_per_zone *
__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
{
@@ -634,38 +727,6 @@ mem_cgroup_largest_soft_limit_node(struc
return mz;
}
-static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
- bool charge)
-{
- int val = (charge) ? 1 : -1;
- mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_SWAPOUT, val);
-}
-
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
- struct page_cgroup *pc,
- bool charge)
-{
- int val = (charge) ? 1 : -1;
- struct mem_cgroup_stat *stat = &mem->stat;
- struct mem_cgroup_stat_cpu *cstat;
- int cpu = get_cpu();
- /* for fast access, we use open-coded manner */
- cstat = &stat->cpustat[cpu];
- if (PageCgroupCache(pc))
- __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_CACHE, val);
- else
- __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_RSS, val);
-
- if (charge)
- __mem_cgroup_stat_add_local(cstat,
- MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
- else
- __mem_cgroup_stat_add_local(cstat,
- MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
- __mem_cgroup_stat_add_local(cstat, MEM_CGROUP_STAT_EVENTS, 1);
- put_cpu();
-}
-
/*
* Call callback function against all cgroup under hierarchy tree.
@@ -1305,34 +1366,6 @@ static void record_last_oom(struct mem_c
mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
}
-/*
- * Currently used to update mapped file statistics, but the routine can be
- * generalized to update other statistics as well.
- */
-void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
-{
- struct mem_cgroup *mem;
- struct page_cgroup *pc;
-
- if (!page_is_file_cache(page))
- return;
-
- pc = lookup_page_cgroup(page);
- if (unlikely(!pc))
- return;
-
- lock_page_cgroup(pc);
- mem = pc->mem_cgroup;
- if (!mem)
- goto done;
-
- if (!PageCgroupUsed(pc))
- goto done;
-
- mem_cgroup_stat_add_local(mem, MEM_CGROUP_STAT_MAPPED_FILE, val);
-done:
- unlock_page_cgroup(pc);
-}
#define CHARGE_SIZE (64 * PAGE_SIZE)
struct memcg_stock_pcp {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2009-09-18 9:06 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-09 8:39 [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 KAMEZAWA Hiroyuki
2009-09-09 8:41 ` [RFC][PATCH 1/4][mmotm] memcg: soft limit clean up KAMEZAWA Hiroyuki
[not found] ` <661de9470909090410t160454a2k658c980b92d11612@mail.gmail.com>
2009-09-10 0:10 ` KAMEZAWA Hiroyuki
2009-09-09 8:41 ` [RFC][PATCH 2/4][mmotm] clean up charge path of softlimit KAMEZAWA Hiroyuki
2009-09-09 8:44 ` [RFC][PATCH 3/4][mmotm] memcg: batched uncharge KAMEZAWA Hiroyuki
2009-09-09 8:45 ` [RFC][PATCH 4/4][mmotm] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-12 4:58 ` Daisuke Nishimura
2009-09-15 0:09 ` KAMEZAWA Hiroyuki
2009-09-09 20:30 ` [RFC][PATCH 0/4][mmotm] memcg: reduce lock contention v3 Balbir Singh
2009-09-10 0:20 ` KAMEZAWA Hiroyuki
2009-09-10 5:18 ` Balbir Singh
2009-09-18 8:47 ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) KAMEZAWA Hiroyuki
2009-09-18 8:50 ` [RFC][PATCH 1/11] memcg: clean up softlimit uncharge KAMEZAWA Hiroyuki
2009-09-18 8:52 ` [RFC][PATCH 2/11]memcg: reduce res_counter_soft_limit_excess KAMEZAWA Hiroyuki
2009-09-18 8:53 ` [RFC][PATCH 3/11] memcg: coalescing uncharge KAMEZAWA Hiroyuki
2009-09-18 8:54 ` [RFC][PATCH 4/11] memcg: coalescing charge KAMEZAWA Hiroyuki
2009-09-18 8:55 ` [RFC][PATCH 5/11] memcg: clean up cancel charge KAMEZAWA Hiroyuki
2009-09-18 8:57 ` [RFC][PATCH 6/11] memcg: cleaun up percpu statistics KAMEZAWA Hiroyuki
2009-09-18 8:58 ` [RFC][PATCH 7/11] memcg: rename from_cont to from_cgroup KAMEZAWA Hiroyuki
2009-09-18 9:00 ` [RFC][PATCH 8/11]memcg: remove unused macro and adds commentary KAMEZAWA Hiroyuki
2009-09-18 9:01 ` [RFC][PATCH 9/11]memcg: clean up zonestat funcs KAMEZAWA Hiroyuki
2009-09-18 9:04 ` KAMEZAWA Hiroyuki [this message]
2009-09-18 9:06 ` [RFC][PATCH 11/11][mmotm] memcg: more commentary and clean up KAMEZAWA Hiroyuki
2009-09-18 10:37 ` [RFC][PATCH 0/11][mmotm] memcg: patch dump (Sep/18) Daisuke Nishimura
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090918180419.fc511373.kamezawa.hiroyu@jp.fujitsu.com \
--to=kamezawa.hiroyu@jp.fujitsu.com \
--cc=balbir@linux.vnet.ibm.com \
--cc=linux-mm@kvack.org \
--cc=nishimura@mxp.nes.nec.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox