From: Tejun Heo <tj@kernel.org>
To: lizefan@huawei.com, paul@paulmenage.org, glommer@parallels.com
Cc: containers@lists.linux-foundation.org, cgroups@vger.kernel.org,
peterz@infradead.org, mhocko@suse.cz, bsingharora@gmail.com,
hannes@cmpxchg.org, kamezawa.hiroyu@jp.fujitsu.com,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Tejun Heo <tj@kernel.org>
Subject: [PATCH 10/13] cpuset: make CPU / memory hotplug propagation asynchronous
Date: Wed, 28 Nov 2012 13:34:17 -0800 [thread overview]
Message-ID: <1354138460-19286-11-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1354138460-19286-1-git-send-email-tj@kernel.org>
cpuset_hotplug_workfn() has been invoking cpuset_propagate_hotplug()
directly to propagate hotplug updates to !root cpusets; however, this
has the following problems.
* cpuset locking is scheduled to be decoupled from cgroup_mutex,
cgroup_mutex will be unexported, and cgroup_attach_task() will do
cgroup locking internally, so propagation can't synchronously move
tasks to a parent cgroup while walking the hierarchy.
* We can't use cgroup generic tree iterator because propagation to
each cpuset may sleep. With propagation done asynchronously, we can
lose the rather ugly cpuset specific iteration.
Convert cpuset_propagate_hotplug() to
cpuset_propagate_hotplug_workfn() and execute it from newly added
cpuset->hotplug_work. The work items are run on an ordered workqueue,
so the propagation order is preserved. cpuset_hotplug_workfn()
schedules all propagations while holding cgroup_mutex and waits for
completion without cgroup_mutex. Each in-flight propagation holds a
reference to the cpuset->css.
This patch doesn't cause any functional difference.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cpuset.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 48 insertions(+), 6 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b530fba..3558250 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -99,6 +99,8 @@ struct cpuset {
/* used for walking a cpuset hierarchy */
struct list_head stack_list;
+
+ struct work_struct hotplug_work;
};
/* Retrieve the cpuset for a cgroup */
@@ -254,7 +256,10 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock);
/*
* CPU / memory hotplug is handled asynchronously.
*/
+static struct workqueue_struct *cpuset_propagate_hotplug_wq;
+
static void cpuset_hotplug_workfn(struct work_struct *work);
+static void cpuset_propagate_hotplug_workfn(struct work_struct *work);
static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
@@ -1802,6 +1807,7 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
fmeter_init(&cs->fmeter);
+ INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn);
cs->relax_domain_level = -1;
cs->parent = cgroup_cs(cont->parent);
@@ -2024,21 +2030,20 @@ static struct cpuset *cpuset_next(struct list_head *queue)
}
/**
- * cpuset_propagate_hotplug - propagate CPU/memory hotplug to a cpuset
+ * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
* @cs: cpuset in interest
*
* Compare @cs's cpu and mem masks against top_cpuset and if some have gone
* offline, update @cs accordingly. If @cs ends up with no CPU or memory,
* all its tasks are moved to the nearest ancestor with both resources.
- *
- * Should be called with cgroup_mutex held.
*/
-static void cpuset_propagate_hotplug(struct cpuset *cs)
+static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
{
static cpumask_t off_cpus;
static nodemask_t off_mems, tmp_mems;
+ struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
- WARN_ON_ONCE(!cgroup_lock_is_held());
+ cgroup_lock();
cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed);
nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed);
@@ -2062,6 +2067,36 @@ static void cpuset_propagate_hotplug(struct cpuset *cs)
if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
remove_tasks_in_empty_cpuset(cs);
+
+ cgroup_unlock();
+
+ /* the following may free @cs, should be the last operation */
+ css_put(&cs->css);
+}
+
+/**
+ * schedule_cpuset_propagate_hotplug - schedule hotplug propagation to a cpuset
+ * @cs: cpuset of interest
+ *
+ * Schedule cpuset_propagate_hotplug_workfn() which will update CPU and
+ * memory masks according to top_cpuset.
+ */
+static void schedule_cpuset_propagate_hotplug(struct cpuset *cs)
+{
+ /*
+ * Pin @cs. The refcnt will be released when the work item
+ * finishes executing.
+ */
+ if (!css_tryget(&cs->css))
+ return;
+
+ /*
+ * Queue @cs->empty_cpuset_work. If already pending, lose the css
+ * ref. cpuset_propagate_hotplug_wq is ordered and propagation
+ * will happen in the order this function is called.
+ */
+ if (!queue_work(cpuset_propagate_hotplug_wq, &cs->hotplug_work))
+ css_put(&cs->css);
}
/**
@@ -2126,11 +2161,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
list_add_tail(&top_cpuset.stack_list, &queue);
while ((cs = cpuset_next(&queue)))
if (cs != &top_cpuset)
- cpuset_propagate_hotplug(cs);
+ schedule_cpuset_propagate_hotplug(cs);
}
cgroup_unlock();
+ /* wait for propagations to finish */
+ flush_workqueue(cpuset_propagate_hotplug_wq);
+
/* rebuild sched domains if cpus_allowed has changed */
if (cpus_updated) {
struct sched_domain_attr *attr;
@@ -2176,6 +2214,10 @@ void __init cpuset_init_smp(void)
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
+
+ cpuset_propagate_hotplug_wq =
+ alloc_ordered_workqueue("cpuset_hotplug", 0);
+ BUG_ON(!cpuset_propagate_hotplug_wq);
}
/**
--
1.7.11.7
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2012-11-28 21:34 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-28 21:34 [PATCHSET cgroup/for-3.8] cpuset: decouple cpuset locking from cgroup core Tejun Heo
2012-11-28 21:34 ` [PATCH 01/13] cpuset: remove unused cpuset_unlock() Tejun Heo
2012-11-28 21:34 ` [PATCH 02/13] cpuset: remove fast exit path from remove_tasks_in_empty_cpuset() Tejun Heo
2012-11-28 21:34 ` [PATCH 03/13] cpuset: introduce ->css_on/offline() Tejun Heo
2012-11-28 21:34 ` [PATCH 04/13] cpuset: introduce CS_ONLINE Tejun Heo
2012-11-28 21:34 ` [PATCH 05/13] cpuset: introduce cpuset_for_each_child() Tejun Heo
2012-11-28 21:34 ` [PATCH 06/13] cpuset: cleanup cpuset[_can]_attach() Tejun Heo
2012-12-26 10:20 ` Li Zefan
2012-12-26 12:04 ` Tejun Heo
2013-01-02 4:42 ` Rusty Russell
2013-01-02 15:34 ` Tejun Heo
2013-01-03 0:47 ` Rusty Russell
2013-01-03 2:29 ` Tejun Heo
2013-01-06 23:28 ` Rusty Russell
2012-11-28 21:34 ` [PATCH 07/13] cpuset: drop async_rebuild_sched_domains() Tejun Heo
2012-11-28 21:34 ` [PATCH 08/13] cpuset: reorganize CPU / memory hotplug handling Tejun Heo
2012-11-28 21:34 ` [PATCH 09/13] cpuset: don't nest cgroup_mutex inside get_online_cpus() Tejun Heo
2012-11-28 21:34 ` Tejun Heo [this message]
2012-11-28 21:34 ` [PATCH 11/13] cpuset: pin down cpus and mems while a task is being attached Tejun Heo
2012-11-28 21:34 ` [PATCH 12/13] cpuset: schedule hotplug propagation from cpuset_attach() if the cpuset is empty Tejun Heo
2012-11-28 21:34 ` [PATCH 13/13] cpuset: replace cgroup_mutex locking with cpuset internal locking Tejun Heo
2012-11-29 11:14 ` [PATCHSET cgroup/for-3.8] cpuset: decouple cpuset locking from cgroup core Glauber Costa
2012-11-29 14:26 ` Tejun Heo
2012-11-29 14:36 ` Tejun Heo
2012-11-30 3:21 ` Kamezawa Hiroyuki
2012-11-30 8:33 ` Michal Hocko
2012-11-30 9:00 ` Glauber Costa
2012-11-30 9:24 ` Michal Hocko
2012-11-30 9:33 ` Glauber Costa
2012-11-30 9:42 ` Glauber Costa
2012-11-30 9:49 ` Michal Hocko
2012-11-30 10:00 ` Glauber Costa
2012-11-30 14:59 ` Tejun Heo
2012-11-30 15:09 ` Glauber Costa
2012-12-03 15:22 ` Michal Hocko
2012-12-03 16:53 ` Tejun Heo
2012-12-06 6:25 ` Li Zefan
2012-12-06 13:09 ` Michal Hocko
2012-12-06 16:54 ` Tejun Heo
2012-12-26 10:51 ` Li Zefan
2013-01-02 8:53 ` Michal Hocko
2013-01-02 15:36 ` Tejun Heo
2013-01-02 16:02 ` Michal Hocko
2013-01-03 22:20 ` Tejun Heo
2013-01-03 21:35 [PATCHSET] cpuset: decouple cpuset locking from cgroup core, take#2 Tejun Heo
2013-01-03 21:36 ` [PATCH 10/13] cpuset: make CPU / memory hotplug propagation asynchronous Tejun Heo
2013-01-06 8:29 ` Li Zefan
2013-01-07 16:42 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1354138460-19286-11-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=bsingharora@gmail.com \
--cc=cgroups@vger.kernel.org \
--cc=containers@lists.linux-foundation.org \
--cc=glommer@parallels.com \
--cc=hannes@cmpxchg.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lizefan@huawei.com \
--cc=mhocko@suse.cz \
--cc=paul@paulmenage.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox