From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ying Han <yinghan@google.com>,
"linux-mm@kvack.org" <linux-mm@kvack.org>,
"kosaki.motohiro@jp.fujitsu.com" <kosaki.motohiro@jp.fujitsu.com>,
"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
Johannes Weiner <jweiner@redhat.com>,
"minchan.kim@gmail.com" <minchan.kim@gmail.com>,
Michal Hocko <mhocko@suse.cz>
Subject: [PATCH 6/7] memcg add zone_all_unreclaimable.
Date: Mon, 25 Apr 2011 18:40:15 +0900 [thread overview]
Message-ID: <20110425184015.c1d97d33.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20110425182529.c7c37bb4.kamezawa.hiroyu@jp.fujitsu.com>
After reclaiming each node per memcg, it checks mem_cgroup_watermark_ok()
and breaks the priority loop if it returns true. The per-memcg zone will
be marked as "unreclaimable" if the scanning rate is much greater than the
reclaiming rate on the per-memcg LRU. The bit is cleared when there is a
page charged to the memcg being freed. Kswapd breaks the priority loop if
all the zones are marked as "unreclaimable".
changelog v8a..v7
remove using priority.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Ying Han <yinghan@google.com>
---
include/linux/memcontrol.h | 40 ++++++++++++++
include/linux/sched.h | 1
include/linux/swap.h | 2
mm/memcontrol.c | 126 +++++++++++++++++++++++++++++++++++++++++++--
mm/vmscan.c | 13 ++++
5 files changed, 177 insertions(+), 5 deletions(-)
Index: memcg/include/linux/memcontrol.h
===================================================================
--- memcg.orig/include/linux/memcontrol.h
+++ memcg/include/linux/memcontrol.h
@@ -158,6 +158,14 @@ unsigned long mem_cgroup_soft_limit_recl
unsigned long *total_scanned);
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
u64 mem_cgroup_get_usage(struct mem_cgroup *mem);
+bool mem_cgroup_zone_reclaimable(struct mem_cgroup *mem, struct zone *zone);
+bool mem_cgroup_mz_unreclaimable(struct mem_cgroup *mem, struct zone *zone);
+void mem_cgroup_mz_set_unreclaimable(struct mem_cgroup *mem, struct zone *zone);
+void mem_cgroup_clear_unreclaimable(struct mem_cgroup *mem, struct page *page);
+void mem_cgroup_mz_clear_unreclaimable(struct mem_cgroup *mem,
+ struct zone *zone);
+void mem_cgroup_mz_pages_scanned(struct mem_cgroup *mem, struct zone* zone,
+ unsigned long nr_scanned);
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -355,6 +363,38 @@ static inline void mem_cgroup_dec_page_s
{
}
+static inline bool mem_cgroup_zone_reclaimable(struct mem_cgroup *mem,
+ struct zone *zone)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_mz_unreclaimable(struct mem_cgroup *mem,
+ struct zone *zone)
+{
+ return false;
+}
+
+static inline void mem_cgroup_mz_set_unreclaimable(struct mem_cgroup *mem,
+ struct zone *zone)
+{
+}
+
+static inline void mem_cgroup_clear_unreclaimable(struct mem_cgroup *mem,
+ struct page *page)
+{
+}
+
+static inline void mem_cgroup_mz_clear_unreclaimable(struct mem_cgroup *mem,
+ struct zone *zone)
+{
+}
+static inline void mem_cgroup_mz_pages_scanned(struct mem_cgroup *mem,
+ struct zone *zone,
+ unsigned long nr_scanned)
+{
+}
+
static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask,
Index: memcg/include/linux/sched.h
===================================================================
--- memcg.orig/include/linux/sched.h
+++ memcg/include/linux/sched.h
@@ -1540,6 +1540,7 @@ struct task_struct {
struct mem_cgroup *memcg; /* target memcg of uncharge */
unsigned long nr_pages; /* uncharged usage */
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
+ struct zone *zone; /* a zone page is last uncharged */
} memcg_batch;
#endif
};
Index: memcg/include/linux/swap.h
===================================================================
--- memcg.orig/include/linux/swap.h
+++ memcg/include/linux/swap.h
@@ -152,6 +152,8 @@ enum {
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
};
+#define ZONE_RECLAIMABLE_RATE 6
+
#define SWAP_CLUSTER_MAX 32
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
Index: memcg/mm/memcontrol.c
===================================================================
--- memcg.orig/mm/memcontrol.c
+++ memcg/mm/memcontrol.c
@@ -139,7 +139,10 @@ struct mem_cgroup_per_zone {
bool on_tree;
struct mem_cgroup *mem; /* Back pointer, we cannot */
/* use container_of */
+ unsigned long pages_scanned; /* since last reclaim */
+ bool all_unreclaimable; /* All pages pinned */
};
+
/* Macro for accessing counter */
#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -1166,12 +1169,15 @@ int mem_cgroup_inactive_file_is_low(stru
return (active > inactive);
}
-unsigned long mem_cgroup_zone_reclaimable_pages(struct mem_cgroup *memcg,
+unsigned long mem_cgroup_zone_reclaimable_pages(struct mem_cgroup *mem,
int nid, int zone_idx)
{
int nr;
- struct mem_cgroup_per_zone *mz =
- mem_cgroup_zoneinfo(memcg, nid, zone_idx);
+ struct mem_cgroup_per_zone *mz;
+
+ if (!mem)
+ return 0;
+ mz = mem_cgroup_zoneinfo(mem, nid, zone_idx);
nr = MEM_CGROUP_ZSTAT(mz, NR_ACTIVE_FILE) +
MEM_CGROUP_ZSTAT(mz, NR_INACTIVE_FILE);
@@ -1222,6 +1228,102 @@ mem_cgroup_get_reclaim_stat_from_page(st
return &mz->reclaim_stat;
}
+void mem_cgroup_mz_pages_scanned(struct mem_cgroup *mem, struct zone *zone,
+ unsigned long nr_scanned)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
+ if (!mem)
+ return;
+
+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
+ if (mz)
+ mz->pages_scanned += nr_scanned;
+}
+
+bool mem_cgroup_zone_reclaimable(struct mem_cgroup *mem, struct zone *zone)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
+ if (!mem)
+ return 0;
+
+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
+
+ return mz->pages_scanned <
+ mem_cgroup_zone_reclaimable_pages(mem, nid, zid) *
+ ZONE_RECLAIMABLE_RATE;
+}
+
+bool mem_cgroup_mz_unreclaimable(struct mem_cgroup *mem, struct zone *zone)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
+ if (!mem)
+ return false;
+
+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
+ if (mz)
+ return mz->all_unreclaimable;
+
+ return false;
+}
+
+void mem_cgroup_mz_set_unreclaimable(struct mem_cgroup *mem, struct zone *zone)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
+ if (!mem)
+ return;
+
+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
+ if (mz)
+ mz->all_unreclaimable = true;
+}
+
+void mem_cgroup_mz_clear_unreclaimable(struct mem_cgroup *mem,
+ struct zone *zone)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
+ if (!mem)
+ return;
+
+ mz = mem_cgroup_zoneinfo(mem, nid, zid);
+ if (mz) {
+ mz->pages_scanned = 0;
+ mz->all_unreclaimable = false;
+ }
+
+ return;
+}
+
+void mem_cgroup_clear_unreclaimable(struct mem_cgroup *mem, struct page *page)
+{
+ struct mem_cgroup_per_zone *mz = NULL;
+
+ if (!mem)
+ return;
+
+ mz = page_cgroup_zoneinfo(mem, page);
+ if (mz) {
+ mz->pages_scanned = 0;
+ mz->all_unreclaimable = false;
+ }
+
+ return;
+}
+
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
@@ -2791,6 +2893,7 @@ void mem_cgroup_cancel_charge_swapin(str
static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
unsigned int nr_pages,
+ struct page *page,
const enum charge_type ctype)
{
struct memcg_batch_info *batch = NULL;
@@ -2808,6 +2911,10 @@ static void mem_cgroup_do_uncharge(struc
*/
if (!batch->memcg)
batch->memcg = mem;
+
+ if (!batch->zone)
+ batch->zone = page_zone(page);
+
/*
* do_batch > 0 when unmapping pages or inode invalidate/truncate.
* In those cases, all pages freed continuously can be expected to be in
@@ -2829,12 +2936,17 @@ static void mem_cgroup_do_uncharge(struc
*/
if (batch->memcg != mem)
goto direct_uncharge;
+
+ if (batch->zone != page_zone(page))
+ mem_cgroup_mz_clear_unreclaimable(mem, page_zone(page));
+
/* remember freed charge and uncharge it later */
batch->nr_pages++;
if (uncharge_memsw)
batch->memsw_nr_pages++;
return;
direct_uncharge:
+ mem_cgroup_mz_clear_unreclaimable(mem, page_zone(page));
res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE);
if (uncharge_memsw)
res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE);
@@ -2916,7 +3028,7 @@ __mem_cgroup_uncharge_common(struct page
mem_cgroup_get(mem);
}
if (!mem_cgroup_is_root(mem))
- mem_cgroup_do_uncharge(mem, nr_pages, ctype);
+ mem_cgroup_do_uncharge(mem, nr_pages, page, ctype);
return mem;
@@ -2984,6 +3096,10 @@ void mem_cgroup_uncharge_end(void)
if (batch->memsw_nr_pages)
res_counter_uncharge(&batch->memcg->memsw,
batch->memsw_nr_pages * PAGE_SIZE);
+ if (batch->zone)
+ mem_cgroup_mz_clear_unreclaimable(batch->memcg, batch->zone);
+ batch->zone = NULL;
+
memcg_oom_recover(batch->memcg);
/* forget this pointer (for sanity check) */
batch->memcg = NULL;
@@ -4659,6 +4775,8 @@ static int alloc_mem_cgroup_per_zone_inf
mz->usage_in_excess = 0;
mz->on_tree = false;
mz->mem = mem;
+ mz->pages_scanned = 0;
+ mz->all_unreclaimable = false;
}
return 0;
}
Index: memcg/mm/vmscan.c
===================================================================
--- memcg.orig/mm/vmscan.c
+++ memcg/mm/vmscan.c
@@ -1412,6 +1412,9 @@ shrink_inactive_list(unsigned long nr_to
ISOLATE_BOTH : ISOLATE_INACTIVE,
zone, sc->mem_cgroup,
0, file);
+
+ mem_cgroup_mz_pages_scanned(sc->mem_cgroup, zone, nr_scanned);
+
/*
* mem_cgroup_isolate_pages() keeps track of
* scanned pages on its own.
@@ -1531,6 +1534,7 @@ static void shrink_active_list(unsigned
* mem_cgroup_isolate_pages() keeps track of
* scanned pages on its own.
*/
+ mem_cgroup_mz_pages_scanned(sc->mem_cgroup, zone, pgscanned);
}
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1998,7 +2002,8 @@ static void shrink_zones(int priority, s
static bool zone_reclaimable(struct zone *zone)
{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+ return zone->pages_scanned < zone_reclaimable_pages(zone) *
+ ZONE_RECLAIMABLE_RATE;
}
/* All zones in zonelist are unreclaimable? */
@@ -2343,6 +2348,10 @@ shrink_memcg_node(int nid, int priority,
scan = mem_cgroup_zone_reclaimable_pages(mem_cont, nid, i);
if (!scan)
continue;
+ /* we would like to remove memory from where we can do easy */
+ if ((sc->nr_reclaimed >= total_scanned/4) &&
+ mem_cgroup_mz_unreclaimable(mem_cont, zone))
+ continue;
/* If recent memory reclaim on this zone doesn't get good */
zrs = get_reclaim_stat(zone, sc);
scan = zrs->recent_scanned[0] + zrs->recent_scanned[1];
@@ -2355,6 +2364,8 @@ shrink_memcg_node(int nid, int priority,
shrink_zone(priority, zone, sc);
total_scanned += sc->nr_scanned;
sc->may_writepage = 0;
+ if (!mem_cgroup_zone_reclaimable(mem_cont, zone))
+ mem_cgroup_mz_set_unreclaimable(mem_cont, zone);
}
sc->nr_scanned = total_scanned;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-04-25 9:46 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-04-25 9:25 [PATCH 0/7] memcg background reclaim , yet another one KAMEZAWA Hiroyuki
2011-04-25 9:28 ` [PATCH 1/7] memcg: add high/low watermark to res_counter KAMEZAWA Hiroyuki
2011-04-26 17:54 ` Ying Han
2011-04-29 13:33 ` Michal Hocko
2011-05-01 6:06 ` KOSAKI Motohiro
2011-05-03 6:49 ` Michal Hocko
2011-05-03 7:45 ` KOSAKI Motohiro
2011-05-03 8:25 ` Michal Hocko
2011-05-03 17:01 ` Ying Han
2011-05-04 8:58 ` Michal Hocko
2011-05-04 17:16 ` Ying Han
2011-05-05 6:59 ` Michal Hocko
2011-05-06 5:28 ` KAMEZAWA Hiroyuki
2011-05-06 14:22 ` Johannes Weiner
2011-05-09 0:21 ` KAMEZAWA Hiroyuki
2011-05-09 5:47 ` Ying Han
2011-05-09 9:58 ` Johannes Weiner
2011-05-09 9:59 ` KAMEZAWA Hiroyuki
2011-05-10 4:43 ` Ying Han
2011-05-09 5:40 ` Ying Han
2011-05-09 7:10 ` KAMEZAWA Hiroyuki
2011-05-09 10:18 ` Johannes Weiner
2011-05-09 12:49 ` Michal Hocko
2011-05-09 23:49 ` KAMEZAWA Hiroyuki
2011-05-10 4:39 ` Ying Han
2011-05-10 4:51 ` Ying Han
2011-05-10 6:27 ` Johannes Weiner
2011-05-10 7:09 ` Ying Han
2011-05-04 3:55 ` KOSAKI Motohiro
2011-05-04 8:55 ` Michal Hocko
2011-05-09 3:24 ` KOSAKI Motohiro
2011-05-02 9:07 ` Balbir Singh
2011-05-06 5:30 ` KAMEZAWA Hiroyuki
2011-04-25 9:29 ` [PATCH 2/7] memcg high watermark interface KAMEZAWA Hiroyuki
2011-04-25 22:36 ` Ying Han
2011-04-25 9:31 ` [PATCH 3/7] memcg: select victim node in round robin KAMEZAWA Hiroyuki
2011-04-25 9:34 ` [PATCH 4/7] memcg fix scan ratio with small memcg KAMEZAWA Hiroyuki
2011-04-25 17:35 ` Ying Han
2011-04-26 1:43 ` KAMEZAWA Hiroyuki
2011-04-25 9:36 ` [PATCH 5/7] memcg bgreclaim core KAMEZAWA Hiroyuki
2011-04-26 4:59 ` Ying Han
2011-04-26 5:08 ` KAMEZAWA Hiroyuki
2011-04-26 23:15 ` Ying Han
2011-04-27 0:10 ` KAMEZAWA Hiroyuki
2011-04-27 1:01 ` KAMEZAWA Hiroyuki
2011-04-26 18:37 ` Ying Han
2011-04-25 9:40 ` KAMEZAWA Hiroyuki [this message]
2011-04-25 9:42 ` [PATCH 7/7] memcg watermark reclaim workqueue KAMEZAWA Hiroyuki
2011-04-26 23:19 ` Ying Han
2011-04-27 0:31 ` KAMEZAWA Hiroyuki
2011-04-27 3:40 ` Ying Han
2011-04-25 9:43 ` [PATCH 8/7] memcg : reclaim statistics KAMEZAWA Hiroyuki
2011-04-26 5:35 ` Ying Han
2011-04-25 9:49 ` [PATCH 0/7] memcg background reclaim , yet another one KAMEZAWA Hiroyuki
2011-04-25 10:14 ` KAMEZAWA Hiroyuki
2011-04-25 22:21 ` Ying Han
2011-04-26 1:38 ` KAMEZAWA Hiroyuki
2011-04-26 7:19 ` Ying Han
2011-04-26 7:43 ` KAMEZAWA Hiroyuki
2011-04-26 8:43 ` Ying Han
2011-04-26 8:47 ` KAMEZAWA Hiroyuki
2011-04-26 23:08 ` Ying Han
2011-04-27 0:34 ` KAMEZAWA Hiroyuki
2011-04-27 1:19 ` Ying Han
2011-04-28 3:55 ` Ying Han
2011-04-28 4:05 ` KAMEZAWA Hiroyuki
2011-05-02 7:02 ` Balbir Singh
2011-05-02 6:09 ` Balbir Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110425184015.c1d97d33.kamezawa.hiroyu@jp.fujitsu.com \
--to=kamezawa.hiroyu@jp.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=jweiner@redhat.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
--cc=minchan.kim@gmail.com \
--cc=nishimura@mxp.nes.nec.co.jp \
--cc=yinghan@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox