From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
"hannes@cmpxchg.org" <hannes@cmpxchg.org>
Subject: [PATCH 2/4] memcg: fix USED bit handling at uncharge in THP
Date: Tue, 18 Jan 2011 11:40:49 +0900 [thread overview]
Message-ID: <20110118114049.5ffdf5da.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20110118113528.fd24928f.kamezawa.hiroyu@jp.fujitsu.com>
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Now, under THP:
at charge:
- PageCgroupUsed bit is set to all page_cgroup on a hugepage.
....set to 512 pages.
at uncharge
- PageCgroupUsed bit is unset on the head page.
So, some pages will remain with "Used" bit.
This patch fixes that Used bit is set only to the head page.
Used bits for tail pages will be set at splitting if necessary.
This patch adds this lock order:
compound_lock() -> page_cgroup_move_lock().
Changelog: v2->v3
- added "don't copy PCG_ACCT_LRU"
Changelog:
- fixed some styles.
- removed BUG_ON()
- fixed mem_cgroup_update_page_stat() v.s. splitting races.
To avoid races, PCG_MOVE_LOCK is used.
- copy all required flags at splitting. (ACCT_LRU etc..should be copied)
- add a dummy function for compile.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/memcontrol.h | 8 +++
mm/huge_memory.c | 2
mm/memcontrol.c | 91 +++++++++++++++++++++++++--------------------
3 files changed, 61 insertions(+), 40 deletions(-)
Index: mmotm-0107/mm/memcontrol.c
===================================================================
--- mmotm-0107.orig/mm/memcontrol.c
+++ mmotm-0107/mm/memcontrol.c
@@ -1614,7 +1614,7 @@ void mem_cgroup_update_page_stat(struct
if (unlikely(!mem || !PageCgroupUsed(pc)))
goto out;
/* pc->mem_cgroup is unstable ? */
- if (unlikely(mem_cgroup_stealed(mem))) {
+ if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
/* take a lock against to access pc->mem_cgroup */
move_lock_page_cgroup(pc, &flags);
need_unlock = true;
@@ -2083,14 +2083,27 @@ struct mem_cgroup *try_get_mem_cgroup_fr
return mem;
}
-/*
- * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
- * USED state. If already USED, uncharge and return.
- */
-static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
- struct page_cgroup *pc,
- enum charge_type ctype)
+static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
+ struct page_cgroup *pc,
+ enum charge_type ctype,
+ int page_size)
{
+ int nr_pages = page_size >> PAGE_SHIFT;
+
+ /* try_charge() can return NULL to *memcg, taking care of it. */
+ if (!mem)
+ return;
+
+ lock_page_cgroup(pc);
+ if (unlikely(PageCgroupUsed(pc))) {
+ unlock_page_cgroup(pc);
+ mem_cgroup_cancel_charge(mem, page_size);
+ return;
+ }
+ /*
+ * we don't need page_cgroup_lock about tail pages, becase they are not
+ * accessed by any other context at this point.
+ */
pc->mem_cgroup = mem;
/*
* We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2114,35 +2127,7 @@ static void ____mem_cgroup_commit_charge
break;
}
- mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), 1);
-}
-
-static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
- struct page_cgroup *pc,
- enum charge_type ctype,
- int page_size)
-{
- int i;
- int count = page_size >> PAGE_SHIFT;
-
- /* try_charge() can return NULL to *memcg, taking care of it. */
- if (!mem)
- return;
-
- lock_page_cgroup(pc);
- if (unlikely(PageCgroupUsed(pc))) {
- unlock_page_cgroup(pc);
- mem_cgroup_cancel_charge(mem, page_size);
- return;
- }
-
- /*
- * we don't need page_cgroup_lock about tail pages, becase they are not
- * accessed by any other context at this point.
- */
- for (i = 0; i < count; i++)
- ____mem_cgroup_commit_charge(mem, pc + i, ctype);
-
+ mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
unlock_page_cgroup(pc);
/*
* "charge_statistics" updated event counter. Then, check it.
@@ -2152,6 +2137,34 @@ static void __mem_cgroup_commit_charge(s
memcg_check_events(mem, pc->page);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
+ (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+/*
+ * Because tail pages are not marked as "used", set it. We're under
+ * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ */
+void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+{
+ struct page_cgroup *head_pc = lookup_page_cgroup(head);
+ struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
+ unsigned long flags;
+
+ /*
+ * We have no races witch charge/uncharge but will have races with
+ * page state accounting.
+ */
+ move_lock_page_cgroup(head_pc, &flags);
+
+ tail_pc->mem_cgroup = head_pc->mem_cgroup;
+ smp_wmb(); /* see __commit_charge() */
+ /* we don't need to copy all flags...*/
+ tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+ move_unlock_page_cgroup(head_pc, &flags);
+}
+#endif
+
/**
* __mem_cgroup_move_account - move account of the page
* @pc: page_cgroup of the page.
@@ -2545,7 +2558,6 @@ direct_uncharge:
static struct mem_cgroup *
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
{
- int i;
int count;
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
@@ -2595,8 +2607,7 @@ __mem_cgroup_uncharge_common(struct page
break;
}
- for (i = 0; i < count; i++)
- mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -1);
+ mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
ClearPageCgroupUsed(pc);
/*
Index: mmotm-0107/include/linux/memcontrol.h
===================================================================
--- mmotm-0107.orig/include/linux/memcontrol.h
+++ mmotm-0107/include/linux/memcontrol.h
@@ -146,6 +146,10 @@ unsigned long mem_cgroup_soft_limit_recl
gfp_t gfp_mask);
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+#endif
+
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct mem_cgroup;
@@ -335,6 +339,10 @@ u64 mem_cgroup_get_limit(struct mem_cgro
return 0;
}
+static inline mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+{
+}
+
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */
Index: mmotm-0107/mm/huge_memory.c
===================================================================
--- mmotm-0107.orig/mm/huge_memory.c
+++ mmotm-0107/mm/huge_memory.c
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(s
BUG_ON(!PageDirty(page_tail));
BUG_ON(!PageSwapBacked(page_tail));
+ mem_cgroup_split_huge_fixup(page, page_tail);
+
lru_add_page_tail(zone, page, page_tail);
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-01-18 2:46 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-01-18 2:35 [PATCH 0/4] fix THP and memcg issues v3 KAMEZAWA Hiroyuki
2011-01-18 2:36 ` [PATCH 1/4] memcg: modify accounting function for supporting THP better KAMEZAWA Hiroyuki
2011-01-18 2:40 ` KAMEZAWA Hiroyuki [this message]
2011-01-19 12:10 ` [PATCH 2/4] memcg: fix USED bit handling at uncharge in THP Johannes Weiner
2011-01-20 0:55 ` KAMEZAWA Hiroyuki
2011-01-18 2:42 ` [PATCH 3/4] memcg: fix LRU accounting with THP KAMEZAWA Hiroyuki
2011-01-18 2:43 ` [PATCH 4/4] memcg: fix rmdir, force_empty " KAMEZAWA Hiroyuki
2011-01-20 13:41 ` Johannes Weiner
2011-01-20 23:39 ` KAMEZAWA Hiroyuki
2011-01-20 23:41 ` KAMEZAWA Hiroyuki
2011-01-21 0:21 ` nishimura
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110118114049.5ffdf5da.kamezawa.hiroyu@jp.fujitsu.com \
--to=kamezawa.hiroyu@jp.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nishimura@mxp.nes.nec.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox