From: Jiufei Xue <jiufei.xue@linux.alibaba.com>
To: cgroups@vger.kernel.org, linux-mm@kvack.org
Cc: tj@kernel.org, akpm@linux-foundation.org, joseph.qi@linux.alibaba.com
Subject: [PATCH v2] fs/fs-writeback: wait isw_nr_in_flight to be zero when umount
Date: Tue, 16 Apr 2019 20:09:02 +0800 [thread overview]
Message-ID: <20190416120902.18616-1-jiufei.xue@linux.alibaba.com> (raw)
synchronize_rcu() didn't wait for call_rcu() callbacks, so inode wb
switch may not go to the workqueue after synchronize_rcu(). Thus
previous scheduled switches was not finished even flushing the
workqueue, which will cause a NULL pointer dereferenced followed below.
VFS: Busy inodes after unmount of vdd. Self-destruct in 5 seconds.
Have a nice day...
BUG: unable to handle kernel NULL pointer dereference at
0000000000000278
[<ffffffff8126a303>] evict+0xb3/0x180
[<ffffffff8126a760>] iput+0x1b0/0x230
[<ffffffff8127c690>] inode_switch_wbs_work_fn+0x3c0/0x6a0
[<ffffffff810a5b2e>] worker_thread+0x4e/0x490
[<ffffffff810a5ae0>] ? process_one_work+0x410/0x410
[<ffffffff810ac056>] kthread+0xe6/0x100
[<ffffffff8173c199>] ret_from_fork+0x39/0x50
Here I don't use rcu_barrier() because it will wait for all the
rcu callbacks which is not appropriate.
Changes since v1: use per-sb s_isw_nr_in_flight to ensure that
s_isw_nr_in_flight will eventually zero.
Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: stable@kernel.org
---
fs/fs-writeback.c | 22 +++++++++++++++-------
fs/super.c | 3 ++-
include/linux/fs.h | 2 ++
include/linux/writeback.h | 4 ++--
4 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 36855c1f8daf..370ac3a872f8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -237,7 +237,6 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
/* one round can affect upto 5 slots */
-static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;
void __inode_attach_wb(struct inode *inode, struct page *page)
@@ -346,6 +345,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
@@ -456,7 +456,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
iput(inode);
kfree(isw);
- atomic_dec(&isw_nr_in_flight);
+ atomic_dec(&sb->s_isw_nr_in_flight);
}
static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -479,6 +479,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
*/
static void inode_switch_wbs(struct inode *inode, int new_wb_id)
{
+ struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = inode_to_bdi(inode);
struct cgroup_subsys_state *memcg_css;
struct inode_switch_wbs_context *isw;
@@ -523,7 +524,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
isw->inode = inode;
- atomic_inc(&isw_nr_in_flight);
+ atomic_inc(&sb->s_isw_nr_in_flight);
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
@@ -898,12 +899,19 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
* rare occurrences and synchronize_rcu() can take a while, perform
* flushing iff wb switches are in flight.
*/
-void cgroup_writeback_umount(void)
+void cgroup_writeback_umount(struct super_block *sb)
{
- if (atomic_read(&isw_nr_in_flight)) {
- synchronize_rcu();
+ if (!atomic_read(&sb->s_isw_nr_in_flight))
+ return;
+
+ synchronize_rcu();
+
+ /*
+ * Now no more switched can be queued for this filesystem, just
+ * wait for inflight switches finished.
+ */
+ while (atomic_read(&sb->s_isw_nr_in_flight))
flush_workqueue(isw_wq);
- }
}
static int __init cgroup_writeback_init(void)
diff --git a/fs/super.c b/fs/super.c
index 583a0124bc39..3d5ebf60b4ee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -248,6 +248,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
spin_lock_init(&s->s_inode_list_lock);
INIT_LIST_HEAD(&s->s_inodes_wb);
spin_lock_init(&s->s_inode_wblist_lock);
+ atomic_set(&s->s_isw_nr_in_flight, 0);
s->s_count = 1;
atomic_set(&s->s_active, 1);
@@ -445,7 +446,7 @@ void generic_shutdown_super(struct super_block *sb)
sb->s_flags &= ~SB_ACTIVE;
fsnotify_sb_delete(sb);
- cgroup_writeback_umount();
+ cgroup_writeback_umount(sb);
evict_inodes(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..4e437e2723b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1519,6 +1519,8 @@ struct super_block {
spinlock_t s_inode_wblist_lock;
struct list_head s_inodes_wb; /* writeback inodes */
+
+ atomic_t s_isw_nr_in_flight;
} __randomize_layout;
/* Helper functions so that in most cases filesystems will
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c24874f..982299c92402 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -190,7 +190,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_io(struct writeback_control *wbc, struct page *page,
size_t bytes);
-void cgroup_writeback_umount(void);
+void cgroup_writeback_umount(struct super_block *sb);
/**
* inode_attach_wb - associate an inode with its wb
@@ -296,7 +296,7 @@ static inline void wbc_account_io(struct writeback_control *wbc,
{
}
-static inline void cgroup_writeback_umount(void)
+static inline void cgroup_writeback_umount(struct super_block *sb)
{
}
--
2.19.1.856.g8858448bb
next reply other threads:[~2019-04-16 12:09 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-16 12:09 Jiufei Xue [this message]
2019-04-16 15:04 ` Tejun Heo
2019-04-17 1:04 ` Jiufei Xue
2019-04-17 19:33 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190416120902.18616-1-jiufei.xue@linux.alibaba.com \
--to=jiufei.xue@linux.alibaba.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=joseph.qi@linux.alibaba.com \
--cc=linux-mm@kvack.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox