From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
"xemul@openvz.org" <xemul@openvz.org>,
Andrew Morton <akpm@linux-foundation.org>,
LKML <linux-kernel@vger.kernel.org>,
Dave Hansen <haveblue@us.ibm.com>,
ryov@valinux.co.jp
Subject: [PATCH 11/12] memcg add to LRU in lazy
Date: Thu, 25 Sep 2008 15:35:06 +0900 [thread overview]
Message-ID: <20080925153506.4afae77a.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20080925151124.25898d22.kamezawa.hiroyu@jp.fujitsu.com>
Delaying add_to_lru() and do it in batched manner like page_vec.
For doing that 2 flags PCG_USED and PCG_LRU.
If PCG_LRU is set, page is on LRU. It safe to access LRU via page_cgroup.
(under some lock.)
For avoiding race, this patch uses TestSetPageCgroupUsed().
and checking PCG_USED bit and PCG_LRU bit in add/free vector.
By this, lock_page_cgroup() in mem_cgroup_charge() is removed.
(I don't want to call lock_page_cgroup() under mz->lru_lock when
add/free vector core logic. So, TestSetPageCgroupUsed() logic is added.
This TestSet is an easy way to avoid unneccesary nest of locks.)
Changelog: v3 -> v5.
- removed css_get/put per page_cgroup struct.
Now, new force_empty checks there is page_cgroup on the memcg.
We don't need to be afraid of leak.
Changelog: v2 -> v3
- added TRANSIT flag and removed lock from core logic.
Changelog: v1 -> v2:
- renamed function name from use_page_cgroup to set_page_cgroup_lru().
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/page_cgroup.h | 10 +++
mm/memcontrol.c | 121 +++++++++++++++++++++++++++++++-------------
2 files changed, 96 insertions(+), 35 deletions(-)
Index: mmotm-2.6.27-rc7+/include/linux/page_cgroup.h
===================================================================
--- mmotm-2.6.27-rc7+.orig/include/linux/page_cgroup.h
+++ mmotm-2.6.27-rc7+/include/linux/page_cgroup.h
@@ -24,6 +24,7 @@ enum {
PCG_LOCK, /* page cgroup is locked */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
+ PCG_LRU, /* this is on LRU */
/* flags for LRU placement */
PCG_ACTIVE, /* page is active in this cgroup */
PCG_FILE, /* page is file system backed */
@@ -42,11 +43,20 @@ static inline void SetPageCgroup##uname(
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }
+#define TESTSETPCGFLAG(uname, lname)\
+static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \
+ { return test_and_set_bit(PCG_##lname, &pc->flags); }
+
/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)
TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)
+TESTSETPCGFLAG(Used, USED)
+
+TESTPCGFLAG(LRU, LRU)
+SETPCGFLAG(LRU, LRU)
+CLEARPCGFLAG(LRU, LRU)
/* LRU management flags (from global-lru definition) */
TESTPCGFLAG(File, FILE)
Index: mmotm-2.6.27-rc7+/mm/memcontrol.c
===================================================================
--- mmotm-2.6.27-rc7+.orig/mm/memcontrol.c
+++ mmotm-2.6.27-rc7+/mm/memcontrol.c
@@ -150,9 +150,9 @@ enum charge_type {
static const unsigned long
pcg_default_flags[NR_CHARGE_TYPE] = {
- (1 << PCG_CACHE) | (1 << PCG_FILE) | (1 << PCG_USED) | (1 << PCG_LOCK),
- (1 << PCG_ACTIVE) | (1 << PCG_LOCK) | (1 << PCG_USED),
- (1 << PCG_ACTIVE) | (1 << PCG_CACHE) | (1 << PCG_USED)| (1 << PCG_LOCK),
+ (1 << PCG_CACHE) | (1 << PCG_FILE) | (1 << PCG_USED),
+ (1 << PCG_ACTIVE) | (1 << PCG_USED),
+ (1 << PCG_ACTIVE) | (1 << PCG_CACHE) | (1 << PCG_USED),
0,
};
@@ -195,7 +195,6 @@ page_cgroup_zoneinfo(struct page_cgroup
struct mem_cgroup *mem = pc->mem_cgroup;
int nid = page_cgroup_nid(pc);
int zid = page_cgroup_zid(pc);
-
return mem_cgroup_zoneinfo(mem, nid, zid);
}
@@ -343,7 +342,7 @@ void mem_cgroup_move_lists(struct page *
if (!trylock_page_cgroup(pc))
return;
- if (PageCgroupUsed(pc)) {
+ if (PageCgroupUsed(pc) && PageCgroupLRU(pc)) {
mem = pc->mem_cgroup;
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
@@ -502,6 +501,9 @@ int mem_cgroup_move_account(struct page
from_mz = mem_cgroup_zoneinfo(from, nid, zid);
to_mz = mem_cgroup_zoneinfo(to, nid, zid);
+ if (!PageCgroupLRU(pc))
+ return ret;
+
if (res_counter_charge(&to->res, PAGE_SIZE)) {
/* Now, we assume no_limit...no failure here. */
return ret;
@@ -518,10 +520,8 @@ int mem_cgroup_move_account(struct page
if (spin_trylock(&to_mz->lru_lock)) {
__mem_cgroup_remove_list(from_mz, pc);
- css_put(&from->css);
res_counter_uncharge(&from->res, PAGE_SIZE);
pc->mem_cgroup = to;
- css_get(&to->css);
__mem_cgroup_add_list(to_mz, pc);
ret = 0;
spin_unlock(&to_mz->lru_lock);
@@ -542,6 +542,7 @@ struct memcg_percpu_vec {
struct page_cgroup *vec[MEMCG_PCPVEC_SIZE];
};
static DEFINE_PER_CPU(struct memcg_percpu_vec, memcg_free_vec);
+static DEFINE_PER_CPU(struct memcg_percpu_vec, memcg_add_vec);
static void
__release_page_cgroup(struct memcg_percpu_vec *mpv)
@@ -557,7 +558,6 @@ __release_page_cgroup(struct memcg_percp
prev_mz = NULL;
for (i = nr - 1; i >= 0; i--) {
pc = mpv->vec[i];
- VM_BUG_ON(PageCgroupUsed(pc));
mz = page_cgroup_zoneinfo(pc);
if (prev_mz != mz) {
if (prev_mz)
@@ -565,9 +565,10 @@ __release_page_cgroup(struct memcg_percp
prev_mz = mz;
spin_lock(&mz->lru_lock);
}
- __mem_cgroup_remove_list(mz, pc);
- css_put(&pc->mem_cgroup->css);
- pc->mem_cgroup = NULL;
+ if (!PageCgroupUsed(pc) && PageCgroupLRU(pc)) {
+ __mem_cgroup_remove_list(mz, pc);
+ ClearPageCgroupLRU(pc);
+ }
}
if (prev_mz)
spin_unlock(&prev_mz->lru_lock);
@@ -576,10 +577,43 @@ __release_page_cgroup(struct memcg_percp
}
static void
+__set_page_cgroup_lru(struct memcg_percpu_vec *mpv)
+{
+ unsigned long flags;
+ struct mem_cgroup_per_zone *mz, *prev_mz;
+ struct page_cgroup *pc;
+ int i, nr;
+
+ local_irq_save(flags);
+ nr = mpv->nr;
+ mpv->nr = 0;
+ prev_mz = NULL;
+
+ for (i = nr - 1; i >= 0; i--) {
+ pc = mpv->vec[i];
+ mz = page_cgroup_zoneinfo(pc);
+ if (prev_mz != mz) {
+ if (prev_mz)
+ spin_unlock(&prev_mz->lru_lock);
+ prev_mz = mz;
+ spin_lock(&mz->lru_lock);
+ }
+ if (PageCgroupUsed(pc) && !PageCgroupLRU(pc)) {
+ SetPageCgroupLRU(pc);
+ __mem_cgroup_add_list(mz, pc);
+ }
+ }
+
+ if (prev_mz)
+ spin_unlock(&prev_mz->lru_lock);
+ local_irq_restore(flags);
+
+}
+
+static void
release_page_cgroup(struct page_cgroup *pc)
{
struct memcg_percpu_vec *mpv;
-
mpv = &get_cpu_var(memcg_free_vec);
mpv->vec[mpv->nr++] = pc;
if (mpv->nr >= mpv->limit)
@@ -587,11 +621,25 @@ release_page_cgroup(struct page_cgroup *
put_cpu_var(memcg_free_vec);
}
+static void
+set_page_cgroup_lru(struct page_cgroup *pc)
+{
+ struct memcg_percpu_vec *mpv;
+
+ mpv = &get_cpu_var(memcg_add_vec);
+ mpv->vec[mpv->nr++] = pc;
+ if (mpv->nr >= mpv->limit)
+ __set_page_cgroup_lru(mpv);
+ put_cpu_var(memcg_add_vec);
+}
+
static void page_cgroup_start_cache_cpu(int cpu)
{
struct memcg_percpu_vec *mpv;
mpv = &per_cpu(memcg_free_vec, cpu);
mpv->limit = MEMCG_PCPVEC_SIZE;
+ mpv = &per_cpu(memcg_add_vec, cpu);
+ mpv->limit = MEMCG_PCPVEC_SIZE;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -600,6 +648,8 @@ static void page_cgroup_stop_cache_cpu(i
struct memcg_percpu_vec *mpv;
mpv = &per_cpu(memcg_free_vec, cpu);
mpv->limit = 0;
+ mpv = &per_cpu(memcg_add_vec, cpu);
+ mpv->limit = 0;
}
#endif
@@ -613,6 +663,9 @@ static DEFINE_MUTEX(memcg_force_drain_mu
static void drain_page_cgroup_local(struct work_struct *work)
{
struct memcg_percpu_vec *mpv;
+ mpv = &get_cpu_var(memcg_add_vec);
+ __set_page_cgroup_lru(mpv);
+ put_cpu_var(mpv);
mpv = &get_cpu_var(memcg_free_vec);
__release_page_cgroup(mpv);
put_cpu_var(mpv);
@@ -679,14 +732,9 @@ static int mem_cgroup_charge_common(stru
rcu_read_unlock();
return 0;
}
- /*
- * For every charge from the cgroup, increment reference count
- */
- css_get(&mem->css);
rcu_read_unlock();
} else {
mem = memcg;
- css_get(&memcg->css);
}
while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
@@ -713,33 +761,36 @@ static int mem_cgroup_charge_common(stru
}
preempt_disable();
- lock_page_cgroup(pc);
- if (unlikely(PageCgroupUsed(pc))) {
- unlock_page_cgroup(pc);
+ if (TestSetPageCgroupUsed(pc)) {
res_counter_uncharge(&mem->res, PAGE_SIZE);
- css_put(&mem->css);
preempt_enable();
goto done;
}
- pc->mem_cgroup = mem;
/*
- * If a page is accounted as a page cache, insert to inactive list.
- * If anon, insert to active list.
- */
- pc->flags = pcg_default_flags[ctype];
-
- mz = page_cgroup_zoneinfo(pc);
+ * page cgroup is *unused* now....but....
+ * We can assume old mem_cgroup's metadata is still available
+ * because pc is not on stale LRU after force_empty() is called.
+ */
+ if (likely(!PageCgroupLRU(pc)))
+ pc->flags = pcg_default_flags[ctype];
+ else {
+ mz = page_cgroup_zoneinfo(pc);
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ if (PageCgroupLRU(pc)) {
+ __mem_cgroup_remove_list(mz, pc);
+ ClearPageCgroupLRU(pc);
+ }
+ pc->flags = pcg_default_flags[ctype];
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
+ }
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_add_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
- unlock_page_cgroup(pc);
+ pc->mem_cgroup = mem;
+ set_page_cgroup_lru(pc);
preempt_enable();
done:
return 0;
out:
- css_put(&mem->css);
return -ENOMEM;
}
@@ -830,12 +881,12 @@ __mem_cgroup_uncharge_common(struct page
return;
}
ClearPageCgroupUsed(pc);
+ mem = pc->mem_cgroup;
unlock_page_cgroup(pc);
preempt_enable();
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
- mem = pc->mem_cgroup;
release_page_cgroup(pc);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
return;
}
next prev parent reply other threads:[~2008-09-25 6:35 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-25 6:11 [PATCH 0/12] memcg updates v5 KAMEZAWA Hiroyuki
2008-09-25 6:13 ` [PATCH 1/12] memcg avoid accounting special mappings not on LRU KAMEZAWA Hiroyuki
2008-09-26 8:25 ` Balbir Singh
2008-09-26 9:17 ` KAMEZAWA Hiroyuki
2008-09-26 9:32 ` Balbir Singh
2008-09-26 9:55 ` KAMEZAWA Hiroyuki
2008-09-25 6:14 ` [PATCH 2/12] memcg move charege() call to swapped-in page under lock_page() KAMEZAWA Hiroyuki
2008-09-26 8:36 ` Balbir Singh
2008-09-26 9:18 ` KAMEZAWA Hiroyuki
2008-09-25 6:15 ` [PATCH 3/12] memcg make root cgroup unlimited KAMEZAWA Hiroyuki
2008-09-26 8:41 ` Balbir Singh
2008-09-26 9:21 ` KAMEZAWA Hiroyuki
2008-09-26 9:29 ` Balbir Singh
2008-09-26 9:59 ` KAMEZAWA Hiroyuki
2008-09-25 6:16 ` [PATCH 4/12] memcg make page->mapping NULL before calling uncharge KAMEZAWA Hiroyuki
2008-09-26 9:47 ` Balbir Singh
2008-09-26 10:07 ` KAMEZAWA Hiroyuki
2008-09-25 6:17 ` [PATCH 5/12] memcg make page_cgroup->flags atomic KAMEZAWA Hiroyuki
2008-09-27 6:58 ` Balbir Singh
2008-09-25 6:18 ` [PATCH 6/12] memcg optimize percpu stat KAMEZAWA Hiroyuki
2008-09-26 9:53 ` Balbir Singh
2008-09-25 6:27 ` [PATCH 7/12] memcg add function to move account KAMEZAWA Hiroyuki
2008-09-26 7:30 ` Daisuke Nishimura
2008-09-26 9:24 ` KAMEZAWA Hiroyuki
2008-09-27 7:56 ` Balbir Singh
2008-09-27 8:35 ` kamezawa.hiroyu
2008-09-25 6:29 ` [PATCH 8/12] memcg rewrite force empty to move account to root KAMEZAWA Hiroyuki
2008-09-25 6:32 ` [PATCH 9/12] memcg allocate all page_cgroup at boot KAMEZAWA Hiroyuki
2008-09-25 18:40 ` Dave Hansen
2008-09-26 1:17 ` KAMEZAWA Hiroyuki
2008-09-26 1:22 ` KAMEZAWA Hiroyuki
2008-09-26 1:00 ` Daisuke Nishimura
2008-09-26 1:43 ` KAMEZAWA Hiroyuki
2008-09-26 2:05 ` KAMEZAWA Hiroyuki
2008-09-26 5:54 ` Daisuke Nishimura
2008-09-26 6:54 ` KAMEZAWA Hiroyuki
2008-09-27 3:47 ` KAMEZAWA Hiroyuki
2008-09-27 3:25 ` KAMEZAWA Hiroyuki
2008-09-26 2:21 ` [PATCH(fixed) " KAMEZAWA Hiroyuki
2008-09-26 2:25 ` [PATCH(fixed) 10/12] free page cgroup from LRU in lazy KAMEZAWA Hiroyuki
2008-09-26 2:28 ` [PATCH(fixed) 11/12] free page cgroup from LRU in add KAMEZAWA Hiroyuki
2008-10-01 4:03 ` [PATCH 9/12] memcg allocate all page_cgroup at boot Balbir Singh
2008-10-01 5:07 ` KAMEZAWA Hiroyuki
2008-10-01 5:30 ` Balbir Singh
2008-10-01 5:41 ` KAMEZAWA Hiroyuki
2008-10-01 6:12 ` KAMEZAWA Hiroyuki
2008-10-01 6:26 ` Balbir Singh
2008-10-01 5:32 ` KAMEZAWA Hiroyuki
2008-10-01 5:59 ` Balbir Singh
2008-10-01 6:17 ` KAMEZAWA Hiroyuki
2008-09-25 6:33 ` [PATCH 10/12] memcg free page_cgroup from LRU in lazy KAMEZAWA Hiroyuki
2008-09-25 6:35 ` KAMEZAWA Hiroyuki [this message]
2008-09-25 6:36 ` [PATCH 12/12] memcg: fix race at charging swap-in KAMEZAWA Hiroyuki
2008-09-26 2:32 ` [PATCH 0/12] memcg updates v5 Daisuke Nishimura
2008-09-26 2:58 ` KAMEZAWA Hiroyuki
2008-09-26 3:04 ` KAMEZAWA Hiroyuki
2008-09-26 3:00 ` Daisuke Nishimura
2008-09-26 4:05 ` KAMEZAWA Hiroyuki
2008-09-26 5:24 ` Daisuke Nishimura
2008-09-26 9:28 ` KAMEZAWA Hiroyuki
2008-09-26 10:43 ` KAMEZAWA Hiroyuki
2008-09-27 2:53 ` KAMEZAWA Hiroyuki
2008-09-26 8:18 ` Balbir Singh
2008-09-26 9:22 ` KAMEZAWA Hiroyuki
2008-09-26 9:31 ` Balbir Singh
2008-09-26 10:36 ` KAMEZAWA Hiroyuki
2008-09-27 3:19 ` KAMEZAWA Hiroyuki
2008-09-29 3:02 ` Balbir Singh
2008-09-29 3:27 ` KAMEZAWA Hiroyuki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080925153506.4afae77a.kamezawa.hiroyu@jp.fujitsu.com \
--to=kamezawa.hiroyu@jp.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=haveblue@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nishimura@mxp.nes.nec.co.jp \
--cc=ryov@valinux.co.jp \
--cc=xemul@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox