linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: balbir@linux.vnet.ibm.com, "xemul@openvz.org" <xemul@openvz.org>,
	"hugh@veritas.com" <hugh@veritas.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	menage@google.com, Dave Hansen <haveblue@us.ibm.com>,
	"nickpiggin@yahoo.com.au" <nickpiggin@yahoo.com.au>
Subject: [RFC] [PATCH 12/9] lazy lru add vie per cpu vector for memcg.
Date: Tue, 16 Sep 2008 21:21:03 +0900	[thread overview]
Message-ID: <20080916212103.200934bb.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20080916211355.277b625d.kamezawa.hiroyu@jp.fujitsu.com>

Delaying add_to_lru() and do it by batch.

For delaying, PCG_LRU flag is added. If PCG_LRU is set, page is on
LRU and unchage() have to call remove from lru. If not, the page is
not added to LRU.

For avoid race, all flags are modified under lock_page_cgroup().

Lazy-add logic reuses Lazy-free's one.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

---
 include/linux/page_cgroup.h |    4 +
 mm/memcontrol.c             |   91 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 11 deletions(-)

Index: mmtom-2.6.27-rc5+/include/linux/page_cgroup.h
===================================================================
--- mmtom-2.6.27-rc5+.orig/include/linux/page_cgroup.h
+++ mmtom-2.6.27-rc5+/include/linux/page_cgroup.h
@@ -23,6 +23,7 @@ enum {
 	PCG_LOCK,  /* page cgroup is locked */
 	PCG_CACHE, /* charged as cache */
 	PCG_USED, /* this object is in use. */
+	PCG_LRU, /* this is on LRU */
 	/* flags for LRU placement */
 	PCG_ACTIVE, /* page is active in this cgroup */
 	PCG_FILE, /* page is file system backed */
@@ -57,6 +58,9 @@ TESTPCGFLAG(Used, USED)
 __SETPCGFLAG(Used, USED)
 __CLEARPCGFLAG(Used, USED)
 
+TESTPCGFLAG(LRU, LRU)
+SETPCGFLAG(LRU, LRU)
+
 /* LRU management flags (from global-lru definition) */
 TESTPCGFLAG(File, FILE)
 SETPCGFLAG(File, FILE)
Index: mmtom-2.6.27-rc5+/mm/memcontrol.c
===================================================================
--- mmtom-2.6.27-rc5+.orig/mm/memcontrol.c
+++ mmtom-2.6.27-rc5+/mm/memcontrol.c
@@ -348,7 +348,7 @@ void mem_cgroup_move_lists(struct page *
 	if (!trylock_page_cgroup(pc))
 		return;
 
-	if (PageCgroupUsed(pc)) {
+	if (PageCgroupUsed(pc) && PageCgroupLRU(pc)) {
 		mem = pc->mem_cgroup;
 		mz = page_cgroup_zoneinfo(pc);
 		spin_lock_irqsave(&mz->lru_lock, flags);
@@ -508,6 +508,9 @@ int mem_cgroup_move_account(struct page 
 	from_mz =  mem_cgroup_zoneinfo(from, nid, zid);
 	to_mz =  mem_cgroup_zoneinfo(to, nid, zid);
 
+	if (!PageCgroupLRU(pc))
+		return ret;
+
 	if (res_counter_charge(&to->res, PAGE_SIZE)) {
 		/* Now, we assume no_limit...no failure here. */
 		return ret;
@@ -550,6 +553,7 @@ struct memcg_percpu_vec {
 	struct page_cgroup *vec[MEMCG_PCPVEC_SIZE];
 };
 DEFINE_PER_CPU(struct memcg_percpu_vec, memcg_free_vec);
+DEFINE_PER_CPU(struct memcg_percpu_vec, memcg_add_vec);
 
 static void
 __release_page_cgroup(struct memcg_percpu_vec *mpv)
@@ -580,6 +584,40 @@ __release_page_cgroup(struct memcg_percp
 }
 
 static void
+__use_page_cgroup(struct memcg_percpu_vec *mpv)
+{
+	unsigned long flags;
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *owner;
+	struct page_cgroup *pc;
+	struct page *freed[MEMCG_PCPVEC_SIZE];
+	int i, nr, freed_num;
+
+	mz = mpv->hot_mz;
+	owner = mpv->hot_memcg;
+	spin_lock_irqsave(&mz->lru_lock, flags);
+	nr = mpv->nr;
+	mpv->nr = 0;
+	freed_num = 0;
+	for (i = nr - 1; i >= 0; i--) {
+		pc = mpv->vec[i];
+		lock_page_cgroup(pc);
+		if (likely(PageCgroupUsed(pc))) {
+			__mem_cgroup_add_list(mz, pc);
+			SetPageCgroupLRU(pc);
+		} else {
+			css_put(&owner->css);
+			freed[freed_num++] = pc->page;
+			pc->mem_cgroup = NULL;
+		}
+		unlock_page_cgroup(pc);
+	}
+	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	while (freed_num--)
+		put_page(freed[freed_num]);
+}
+
+static void
 release_page_cgroup(struct mem_cgroup_per_zone *mz,struct page_cgroup *pc)
 {
 	struct memcg_percpu_vec *mpv;
@@ -597,11 +635,30 @@ release_page_cgroup(struct mem_cgroup_pe
 	put_cpu_var(memcg_free_vec);
 }
 
+static void
+use_page_cgroup(struct mem_cgroup_per_zone *mz, struct page_cgroup *pc)
+{
+	struct memcg_percpu_vec *mpv;
+	mpv = &get_cpu_var(memcg_add_vec);
+	if (mpv->hot_mz != mz) {
+		if (mpv->nr > 0)
+			__use_page_cgroup(mpv);
+		mpv->hot_mz = mz;
+		mpv->hot_memcg = pc->mem_cgroup;
+	}
+	mpv->vec[mpv->nr++] = pc;
+	if (mpv->nr >= mpv->limit)
+		__use_page_cgroup(mpv);
+	put_cpu_var(memcg_add_vec);
+}
+
 static void page_cgroup_start_cache_cpu(int cpu)
 {
 	struct memcg_percpu_vec *mpv;
 	mpv = &per_cpu(memcg_free_vec, cpu);
 	mpv->limit = MEMCG_PCPVEC_SIZE;
+	mpv = &per_cpu(memcg_add_vec, cpu);
+	mpv->limit = MEMCG_PCPVEC_SIZE;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -610,6 +667,8 @@ static void page_cgroup_stop_cache_cpu(i
 	struct memcg_percpu_vec *mpv;
 	mpv = &per_cpu(memcg_free_vec, cpu);
 	mpv->limit = 0;
+	mpv = &per_cpu(memcg_add_vec, cpu);
+	mpv->limit = 0;
 }
 #endif
 
@@ -623,6 +682,9 @@ static DEFINE_MUTEX(memcg_force_drain_mu
 static void drain_page_cgroup_local(struct work_struct *work)
 {
 	struct memcg_percpu_vec *mpv;
+	mpv = &get_cpu_var(memcg_add_vec);
+	__use_page_cgroup(mpv);
+	put_cpu_var(mpv);
 	mpv = &get_cpu_var(memcg_free_vec);
 	__release_page_cgroup(mpv);
 	put_cpu_var(mpv);
@@ -668,7 +730,6 @@ static int mem_cgroup_charge_common(stru
 	struct page_cgroup *pc;
 	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 	struct mem_cgroup_per_zone *mz;
-	unsigned long flags;
 
 	/* avoid case in boot sequence */
 	if (unlikely(PageReserved(page)))
@@ -753,9 +814,7 @@ static int mem_cgroup_charge_common(stru
 	unlock_page_cgroup(pc);
 
 	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_add_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
+	use_page_cgroup(mz, pc);
 	preempt_enable();
 
 done:
@@ -830,23 +889,33 @@ __mem_cgroup_uncharge_common(struct page
 	struct mem_cgroup *mem;
 	struct mem_cgroup_per_zone *mz;
 	unsigned long pfn = page_to_pfn(page);
+	int need_to_release;
 
 	if (!under_mem_cgroup(page))
 		return;
 	pc = lookup_page_cgroup(pfn);
-	if (unlikely(!pc || !PageCgroupUsed(pc)))
+	if (unlikely(!pc))
 		return;
-
 	preempt_disable();
+
 	lock_page_cgroup(pc);
+
+	if (unlikely(!PageCgroupUsed(pc))) {
+		unlock_page_cgroup(pc);
+		preempt_enable();
+		return;
+	}
+
+	need_to_release = PageCgroupLRU(pc);
+	mem = pc->mem_cgroup;
 	__ClearPageCgroupUsed(pc);
 	unlock_page_cgroup(pc);
 	preempt_enable();
 
-	mem = pc->mem_cgroup;
-	mz = page_cgroup_zoneinfo(pc);
-
-	release_page_cgroup(mz, pc);
+	if (likely(need_to_release)) {
+		mz = page_cgroup_zoneinfo(pc);
+		release_page_cgroup(mz, pc);
+	}
 	res_counter_uncharge(&mem->res, PAGE_SIZE);
 
 	return;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-09-16 12:21 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-11 11:08 [RFC] [PATCH 0/9] remove page_cgroup pointer (with some enhancements) KAMEZAWA Hiroyuki
2008-09-11 11:11 ` [RFC] [PATCH 1/9] memcg:make root no limit KAMEZAWA Hiroyuki
2008-09-11 11:13 ` [RFC] [PATCH 2/9] memcg: atomic page_cgroup flags KAMEZAWA Hiroyuki
2008-09-11 11:14 ` [RFC] [PATCH 3/9] memcg: move_account between groups KAMEZAWA Hiroyuki
2008-09-12  4:36   ` KAMEZAWA Hiroyuki
2008-09-11 11:16 ` [RFC] [PATCH 4/9] memcg: new force empty KAMEZAWA Hiroyuki
2008-09-11 11:17 ` [RFC] [PATCH 5/9] memcg: set mapping null before uncharge KAMEZAWA Hiroyuki
2008-09-11 11:18 ` [RFC] [PATCH 6/9] memcg: optimize stat KAMEZAWA Hiroyuki
2008-09-11 11:20 ` [RFC] [PATCH 7/9] memcg: charge likely success KAMEZAWA Hiroyuki
2008-09-11 11:22 ` [RFC] [PATCH 8/9] memcg: remove page_cgroup pointer from memmap KAMEZAWA Hiroyuki
2008-09-11 14:00   ` Nick Piggin
2008-09-11 14:38   ` kamezawa.hiroyu
2008-09-11 15:01   ` kamezawa.hiroyu
2008-09-12 16:12   ` Balbir Singh
2008-09-12 16:19     ` Dave Hansen
2008-09-12 16:23       ` Dave Hansen
2008-09-16 12:13     ` memcg: lazy_lru (was Re: [RFC] [PATCH 8/9] memcg: remove page_cgroup pointer from memmap) KAMEZAWA Hiroyuki
2008-09-16 12:17       ` [RFC][PATCH 10/9] get/put page at charge/uncharge KAMEZAWA Hiroyuki
2008-09-16 12:19       ` [RFC][PATCH 11/9] lazy lru free vector for memcg KAMEZAWA Hiroyuki
2008-09-16 12:23         ` Pavel Emelyanov
2008-09-16 13:02         ` kamezawa.hiroyu
2008-09-16 12:21       ` KAMEZAWA Hiroyuki [this message]
2008-09-11 11:24 ` [RFC] [PATCH 9/9] memcg: percpu page cgroup lookup cache KAMEZAWA Hiroyuki
2008-09-11 11:31   ` Nick Piggin
2008-09-11 12:49   ` kamezawa.hiroyu
2008-09-12  9:35 ` [RFC] [PATCH 0/9] remove page_cgroup pointer (with some enhancements) KAMEZAWA Hiroyuki
2008-09-12 10:18   ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080916212103.200934bb.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=haveblue@us.ibm.com \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=menage@google.com \
    --cc=nickpiggin@yahoo.com.au \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox