linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"hugh@veritas.com" <hugh@veritas.com>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>
Subject: [mmotm][PATCH 4/4] replacement-for-memcg-memswap-controller-core-make-resize-limit-hold-mutex.patch
Date: Tue, 2 Dec 2008 13:21:08 +0900	[thread overview]
Message-ID: <20081202132108.1a4c54ee.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20081202131723.806f1724.kamezawa.hiroyu@jp.fujitsu.com>

From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>

mem_cgroup_resize_memsw_limit() try to hold memsw.lock while holding
res.lock, so below message is showed when trying to write
memory.memsw.limit_in_bytes file.

    [ INFO: possible recursive locking detected ]
    2.6.28-rc4-mm1-mmotm-2008-11-14-20-50-ef4e17ef #1

    bash/4406 is trying to acquire lock:
     (&counter->lock){....}, at: [<c0498408>] mem_cgroup_resize_memsw_limit+0x8d/0x113

    but task is already holding lock:
     (&counter->lock){....}, at: [<c04983d6>] mem_cgroup_resize_memsw_limit+0x5b/0x113

    other info that might help us debug this:
    1 lock held by bash/4406:
     #0:  (&counter->lock){....}, at: [<c04983d6>] mem_cgroup_resize_memsw_limit+0x5b/0x113

    stack backtrace:
    Pid: 4406, comm: bash Not tainted 2.6.28-rc4-mm1-mmotm-2008-11-14-20-50-ef4e17ef #1
    Call Trace:
     [<c066e60f>] ? printk+0xf/0x18
     [<c044d0c0>] __lock_acquire+0xc67/0x1353
     [<c044d793>] ? __lock_acquire+0x133a/0x1353
     [<c044d81c>] lock_acquire+0x70/0x97
     [<c0498408>] ? mem_cgroup_resize_memsw_limit+0x8d/0x113
     [<c0671519>] _spin_lock_irqsave+0x3a/0x6d
     [<c0498408>] ? mem_cgroup_resize_memsw_limit+0x8d/0x113
     [<c0498408>] mem_cgroup_resize_memsw_limit+0x8d/0x113
     [<c0518a6c>] ? memparse+0x14/0x66
     [<c0498594>] mem_cgroup_write+0x4a/0x50
     [<c045e063>] cgroup_file_write+0x181/0x1c6
     [<c0449e43>] ? lock_release_holdtime+0x1a/0x168
     [<c04ec725>] ? security_file_permission+0xf/0x11
     [<c049b5f0>] ? rw_verify_area+0x76/0x97
     [<c045dee2>] ? cgroup_file_write+0x0/0x1c6
     [<c049bce6>] vfs_write+0x8a/0x12e
     [<c049be23>] sys_write+0x3b/0x60
     [<c0403867>] sysenter_do_call+0x12/0x3f

This patch define a new mutex and make both mem_cgroup_resize_limit and
mem_cgroup_memsw_resize_limit hold it to remove spin_lock_irqsave.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Index: mmotm-2.6.28-Dec01/mm/memcontrol.c
===================================================================
--- mmotm-2.6.28-Dec01.orig/mm/memcontrol.c
+++ mmotm-2.6.28-Dec01/mm/memcontrol.c
@@ -27,6 +27,7 @@
 #include <linux/backing-dev.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
@@ -1189,32 +1190,43 @@ int mem_cgroup_shrink_usage(struct mm_st
 	return 0;
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
-				   unsigned long long val)
+				unsigned long long val)
 {
 
 	int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
 	int progress;
+	u64 memswlimit;
 	int ret = 0;
 
-	if (do_swap_account) {
-		if (val > memcg->memsw.limit)
-			return -EINVAL;
-	}
-
-	while (res_counter_set_limit(&memcg->res, val)) {
+	while (retry_count) {
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			break;
 		}
-		if (!retry_count) {
-			ret = -EBUSY;
+		/*
+		 * Rather than hide all in some function, I do this in
+		 * open coded manner. You see what this really does.
+		 * We have to guarantee mem->res.limit < mem->memsw.limit.
+		 */
+		mutex_lock(&set_limit_mutex);
+		memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+		if (memswlimit < val) {
+			ret = -EINVAL;
+			mutex_unlock(&set_limit_mutex);
 			break;
 		}
+		ret = res_counter_set_limit(&memcg->res, val);
+		mutex_unlock(&set_limit_mutex);
+
+		if (!ret)
+			break;
+
 		progress = try_to_free_mem_cgroup_pages(memcg,
 				GFP_KERNEL, false);
-		if (!progress)
-			retry_count--;
+  		if (!progress)			retry_count--;
 	}
 	return ret;
 }
@@ -1223,7 +1235,6 @@ int mem_cgroup_resize_memsw_limit(struct
 				  unsigned long long val)
 {
 	int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
-	unsigned long flags;
 	u64 memlimit, oldusage, curusage;
 	int ret;
 
@@ -1240,19 +1251,20 @@ int mem_cgroup_resize_memsw_limit(struct
 		 * open coded manner. You see what this really does.
 		 * We have to guarantee mem->res.limit < mem->memsw.limit.
 		 */
-		spin_lock_irqsave(&memcg->res.lock, flags);
-		memlimit = memcg->res.limit;
+		mutex_lock(&set_limit_mutex);
+		memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
 		if (memlimit > val) {
-			spin_unlock_irqrestore(&memcg->res.lock, flags);
 			ret = -EINVAL;
+			mutex_unlock(&set_limit_mutex);
 			break;
 		}
 		ret = res_counter_set_limit(&memcg->memsw, val);
-		oldusage = memcg->memsw.usage;
-		spin_unlock_irqrestore(&memcg->res.lock, flags);
+		mutex_unlock(&set_limit_mutex);
 
 		if (!ret)
 			break;
+
+		oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		if (curusage >= oldusage)
@@ -1261,6 +1273,7 @@ int mem_cgroup_resize_memsw_limit(struct
 	return ret;
 }
 
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-12-02  4:22 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-02  4:17 [mmotm][PATCH 0/4] request for patch replacement KAMEZAWA Hiroyuki
2008-12-02  4:18 ` [mmotm][PATCH 1/4] replacement-for-memcg-simple-migration-handling.patch KAMEZAWA Hiroyuki
2008-12-02  4:35   ` Balbir Singh
2008-12-02  4:49     ` KAMEZAWA Hiroyuki
2008-12-02  4:19 ` [mmotm][PATCH 2/4] replacement-for-memcg-handle-swap-caches.patch KAMEZAWA Hiroyuki
2008-12-02  4:20 ` [mmotm][PATCH 3/4] replacement-for-memcg-memswap-controller-core.patch KAMEZAWA Hiroyuki
2008-12-02  4:21 ` KAMEZAWA Hiroyuki [this message]
2008-12-03  7:49 ` [mmotm][PATCH 0/4] request for patch replacement KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081202132108.1a4c54ee.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=hugh@veritas.com \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox