From: yamamoto@valinux.co.jp (YAMAMOTO Takashi)
To: kamezawa.hiroyu@jp.fujitsu.com
Cc: linux-mm@kvack.org, menage@google.com,
containers@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, a.p.zijlstra@chello.nl
Subject: Re: [PATCH][RFC] dirty balancing for cgroups
Date: Wed, 6 Aug 2008 17:20:46 +0900 (JST) [thread overview]
Message-ID: <20080806082046.349BE5A5F@siro.lan> (raw)
In-Reply-To: Your message of "Fri, 11 Jul 2008 17:52:13 +0900" <20080711175213.dc69f068.kamezawa.hiroyu@jp.fujitsu.com>
hi,
> On Fri, 11 Jul 2008 17:34:46 +0900 (JST)
> yamamoto@valinux.co.jp (YAMAMOTO Takashi) wrote:
>
> > hi,
> >
> > > > my patch penalizes heavy-writer cgroups as task_dirty_limit does
> > > > for heavy-writer tasks. i don't think that it's necessary to be
> > > > tied to the memory subsystem because i merely want to group writers.
> > > >
> > > Hmm, maybe what I need is different from this ;)
> > > Does not seem to be a help for memory reclaim under memcg.
> >
> > to implement what you need, i think that we need to keep track of
> > the numbers of dirty-pages in each memory cgroups as a first step.
> > do you agree?
> >
> yes, I think so, now.
>
> may be not difficult but will add extra overhead ;( Sigh..
the following is a patch to add the overhead. :)
any comments?
YAMAMOTO Takashi
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
---
diff --git a/fs/buffer.c b/fs/buffer.c
index 9749a90..a2dc642 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <linux/memcontrol.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -708,12 +709,19 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static int __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
+ if (unlikely(!mapping)) {
+ if (TestSetPageDirty(page)) {
+ return 0;
+ } else {
+ mem_cgroup_set_page_dirty(page);
+ return 1;
+ }
+ }
if (TestSetPageDirty(page))
return 0;
+ mem_cgroup_set_page_dirty(page);
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -762,8 +770,14 @@ int __set_page_dirty_buffers(struct page *page)
{
struct address_space *mapping = page_mapping(page);
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
+ if (unlikely(!mapping)) {
+ if (TestSetPageDirty(page)) {
+ return 0;
+ } else {
+ mem_cgroup_set_page_dirty(page);
+ return 1;
+ }
+ }
spin_lock(&mapping->private_lock);
if (page_has_buffers(page)) {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ee1b2fc..f04441f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -57,6 +57,9 @@ extern int
mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
extern void mem_cgroup_end_migration(struct page *page);
+extern void mem_cgroup_set_page_dirty(struct page *page);
+extern void mem_cgroup_clear_page_dirty(struct page *page);
+
/*
* For memory reclaim.
*/
@@ -132,6 +135,14 @@ static inline void mem_cgroup_end_migration(struct page *page)
{
}
+static inline void mem_cgroup_set_page_dirty(struct page *page)
+{
+}
+
+static inline void mem_cgroup_clear_page_dirty(struct page *page)
+{
+}
+
static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
{
return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 344a477..33d14b7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -49,6 +49,7 @@ enum mem_cgroup_stat_index {
*/
MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */
+ MEM_CGROUP_STAT_DIRTY, /* # of dirty pages */
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
@@ -73,6 +74,14 @@ static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
stat->cpustat[cpu].count[idx] += val;
}
+static void __mem_cgroup_stat_add(struct mem_cgroup_stat *stat,
+ enum mem_cgroup_stat_index idx, int val)
+{
+ int cpu = get_cpu();
+ stat->cpustat[cpu].count[idx] += val;
+ put_cpu();
+}
+
static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
enum mem_cgroup_stat_index idx)
{
@@ -164,6 +173,7 @@ struct page_cgroup {
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
#define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8) /* page is unevictableable */
+#define PAGE_CGROUP_FLAG_DIRTY (0x10) /* accounted as dirty */
static int page_cgroup_nid(struct page_cgroup *pc)
{
@@ -196,6 +206,9 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
else
__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+ if (flags & PAGE_CGROUP_FLAG_DIRTY)
+ __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_DIRTY, val);
+
if (charge)
__mem_cgroup_stat_add_safe(stat,
MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
@@ -284,6 +297,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
{
int lru = LRU_BASE;
+ VM_BUG_ON(!page_cgroup_locked(pc->page));
if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
lru = LRU_UNEVICTABLE;
else {
@@ -304,6 +318,7 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
{
int lru = LRU_BASE;
+ VM_BUG_ON(!page_cgroup_locked(pc->page));
if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
lru = LRU_UNEVICTABLE;
else {
@@ -328,6 +343,8 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
enum lru_list from = unevictable ? LRU_UNEVICTABLE :
(LRU_FILE * !!file + !!active);
+ VM_BUG_ON(!page_cgroup_locked(pc->page));
+
if (lru == from)
return;
@@ -485,7 +502,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
if (PageUnevictable(page) ||
(PageActive(page) && !active) ||
(!PageActive(page) && active)) {
- __mem_cgroup_move_lists(pc, page_lru(page));
+ if (try_lock_page_cgroup(page)) {
+ __mem_cgroup_move_lists(pc, page_lru(page));
+ unlock_page_cgroup(page);
+ }
continue;
}
@@ -772,6 +792,38 @@ void mem_cgroup_end_migration(struct page *newpage)
mem_cgroup_uncharge_page(newpage);
}
+void mem_cgroup_set_page_dirty(struct page *pg)
+{
+ struct page_cgroup *pc;
+
+ lock_page_cgroup(pg);
+ pc = page_get_page_cgroup(pg);
+ if (pc != NULL && (pc->flags & PAGE_CGROUP_FLAG_DIRTY) == 0) {
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ struct mem_cgroup_stat *stat = &mem->stat;
+
+ pc->flags |= PAGE_CGROUP_FLAG_DIRTY;
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_DIRTY, 1);
+ }
+ unlock_page_cgroup(pg);
+}
+
+void mem_cgroup_clear_page_dirty(struct page *pg)
+{
+ struct page_cgroup *pc;
+
+ lock_page_cgroup(pg);
+ pc = page_get_page_cgroup(pg);
+ if (pc != NULL && (pc->flags & PAGE_CGROUP_FLAG_DIRTY) != 0) {
+ struct mem_cgroup *mem = pc->mem_cgroup;
+ struct mem_cgroup_stat *stat = &mem->stat;
+
+ pc->flags &= ~PAGE_CGROUP_FLAG_DIRTY;
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_DIRTY, -1);
+ }
+ unlock_page_cgroup(pg);
+}
+
/*
* A call to try to shrink memory usage under specified resource controller.
* This is typically used for page reclaiming for shmem for reducing side
@@ -957,6 +1009,7 @@ static const struct mem_cgroup_stat_desc {
} mem_cgroup_stat_desc[] = {
[MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, },
[MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, },
+ [MEM_CGROUP_STAT_DIRTY] = { "dirty", 1, },
[MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, },
[MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, },
};
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c6d6088..14dc9af 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -34,6 +34,7 @@
#include <linux/syscalls.h>
#include <linux/buffer_head.h>
#include <linux/pagevec.h>
+#include <linux/memcontrol.h>
/*
* The maximum number of pages to writeout in a single bdflush/kupdate
@@ -1081,6 +1082,8 @@ int __set_page_dirty_nobuffers(struct page *page)
struct address_space *mapping = page_mapping(page);
struct address_space *mapping2;
+ mem_cgroup_set_page_dirty(page);
+
if (!mapping)
return 1;
@@ -1138,8 +1141,10 @@ static int __set_page_dirty(struct page *page)
return (*spd)(page);
}
if (!PageDirty(page)) {
- if (!TestSetPageDirty(page))
+ if (!TestSetPageDirty(page)) {
+ mem_cgroup_set_page_dirty(page);
return 1;
+ }
}
return 0;
}
@@ -1234,6 +1239,7 @@ int clear_page_dirty_for_io(struct page *page)
* for more comments.
*/
if (TestClearPageDirty(page)) {
+ mem_cgroup_clear_page_dirty(page);
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
BDI_RECLAIMABLE);
@@ -1241,7 +1247,11 @@ int clear_page_dirty_for_io(struct page *page)
}
return 0;
}
- return TestClearPageDirty(page);
+ if (TestClearPageDirty(page)) {
+ mem_cgroup_clear_page_dirty(page);
+ return 1;
+ }
+ return 0;
}
EXPORT_SYMBOL(clear_page_dirty_for_io);
diff --git a/mm/truncate.c b/mm/truncate.c
index 4d129a5..9b1e215 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -18,6 +18,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
+#include <linux/memcontrol.h>
#include "internal.h"
@@ -72,6 +73,8 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
{
if (TestClearPageDirty(page)) {
struct address_space *mapping = page->mapping;
+
+ mem_cgroup_clear_page_dirty(page);
if (mapping && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-08-06 8:20 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20080711085449.ba7d14dd.kamezawa.hiroyu@jp.fujitsu.com>
2008-07-11 4:06 ` YAMAMOTO Takashi
2008-07-11 5:15 ` KAMEZAWA Hiroyuki
2008-07-11 5:59 ` YAMAMOTO Takashi
2008-07-11 7:13 ` KAMEZAWA Hiroyuki
2008-07-11 8:34 ` YAMAMOTO Takashi
2008-07-11 8:52 ` KAMEZAWA Hiroyuki
2008-08-06 8:20 ` YAMAMOTO Takashi [this message]
2008-08-06 8:53 ` KAMEZAWA Hiroyuki
2008-08-06 9:10 ` YAMAMOTO Takashi
2008-08-07 13:36 ` Peter Zijlstra
2008-08-13 7:15 ` YAMAMOTO Takashi
2008-08-18 7:58 ` KAMEZAWA Hiroyuki
2008-07-14 13:49 ` Peter Zijlstra
2008-07-17 1:43 ` YAMAMOTO Takashi
2008-08-14 8:38 ` Paul Menage
2008-07-14 14:38 ` kamezawa.hiroyu
[not found] <20080709060034.0CB2D5A29@siro.lan>
2008-07-14 13:37 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080806082046.349BE5A5F@siro.lan \
--to=yamamoto@valinux.co.jp \
--cc=a.p.zijlstra@chello.nl \
--cc=containers@lists.linux-foundation.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=menage@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox