linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: Jan Kara <jack@suse.cz>, Tejun Heo <tj@kernel.org>
Cc: <linux-fsdevel@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-mm@kvack.org>, Alexander Viro <viro@zeniv.linux.org.uk>,
	Dennis Zhou <dennis@kernel.org>,
	Dave Chinner <dchinner@redhat.com>, <cgroups@vger.kernel.org>,
	Roman Gushchin <guro@fb.com>
Subject: [PATCH v5 1/2] writeback, cgroup: keep list of inodes attached to bdi_writeback
Date: Wed, 26 May 2021 15:25:56 -0700	[thread overview]
Message-ID: <20210526222557.3118114-2-guro@fb.com> (raw)
In-Reply-To: <20210526222557.3118114-1-guro@fb.com>

Currently there is no way to iterate over inodes attached to a
specific cgwb structure. It limits the ability to efficiently
reclaim the writeback structure itself and associated memory and
block cgroup structures without scanning all inodes belonging to a sb,
which can be prohibitively expensive.

While dirty/in-active-writeback an inode belongs to one of the
bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time.
Once cleaned up, it's removed from all io lists. So the
inode->i_io_list can be reused to maintain the list of inodes,
attached to a bdi_writeback structure.

This patch introduces a new wb->b_attached list, which contains all
inodes which were dirty at least once and are attached to the given
cgwb. Inodes attached to the root bdi_writeback structures are never
placed on such list. The following patch will use this list to try to
release cgwbs structures more efficiently.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Roman Gushchin <guro@fb.com>
---
 fs/fs-writeback.c                | 66 ++++++++++++++++++++++++--------
 include/linux/backing-dev-defs.h |  1 +
 include/linux/backing-dev.h      |  7 ++++
 include/linux/writeback.h        |  1 +
 mm/backing-dev.c                 |  2 +
 5 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..631ef6366293 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -135,18 +135,23 @@ static bool inode_io_list_move_locked(struct inode *inode,
  * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
  * @inode: inode to be removed
  * @wb: bdi_writeback @inode is being removed from
+ * @final: inode is going to be freed and can't reappear on any IO list
  *
  * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
  * clear %WB_has_dirty_io if all are empty afterwards.
  */
 static void inode_io_list_del_locked(struct inode *inode,
-				     struct bdi_writeback *wb)
+				     struct bdi_writeback *wb,
+				     bool final)
 {
 	assert_spin_locked(&wb->list_lock);
 	assert_spin_locked(&inode->i_lock);
 
 	inode->i_state &= ~I_SYNC_QUEUED;
-	list_del_init(&inode->i_io_list);
+	if (final)
+		list_del_init(&inode->i_io_list);
+	else
+		inode_cgwb_move_to_attached(inode, wb);
 	wb_io_lists_depopulated(wb);
 }
 
@@ -278,6 +283,25 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
 }
 EXPORT_SYMBOL_GPL(__inode_attach_wb);
 
+/**
+ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
+ * @inode: inode of interest with i_lock held
+ * @wb: target bdi_writeback
+ *
+ * Remove the inode from wb's io lists and if necessarily put onto b_attached
+ * list.  Only inodes attached to cgwb's are kept on this list.
+ */
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb)
+{
+	assert_spin_locked(&wb->list_lock);
+	assert_spin_locked(&inode->i_lock);
+
+	if (wb != &wb->bdi->wb)
+		list_move(&inode->i_io_list, &wb->b_attached);
+	else
+		list_del_init(&inode->i_io_list);
+}
+
 /**
  * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
  * @inode: inode of interest with i_lock held
@@ -419,21 +443,29 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	wb_get(new_wb);
 
 	/*
-	 * Transfer to @new_wb's IO list if necessary.  The specific list
-	 * @inode was on is ignored and the inode is put on ->b_dirty which
-	 * is always correct including from ->b_dirty_time.  The transfer
-	 * preserves @inode->dirtied_when ordering.
+	 * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+	 * the specific list @inode was on is ignored and the @inode is put on
+	 * ->b_dirty which is always correct including from ->b_dirty_time.
+	 * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+	 * was clean, it means it was on the b_attached list, so move it onto
+	 * the b_attached list of @new_wb.
 	 */
 	if (!list_empty(&inode->i_io_list)) {
-		struct inode *pos;
-
-		inode_io_list_del_locked(inode, old_wb);
+		inode_io_list_del_locked(inode, old_wb, true);
 		inode->i_wb = new_wb;
-		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
-			if (time_after_eq(inode->dirtied_when,
-					  pos->dirtied_when))
-				break;
-		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
+
+		if (inode->i_state & I_DIRTY_ALL) {
+			struct inode *pos;
+
+			list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+				if (time_after_eq(inode->dirtied_when,
+						  pos->dirtied_when))
+					break;
+			inode_io_list_move_locked(inode, new_wb,
+						  pos->i_io_list.prev);
+		} else {
+			inode_cgwb_move_to_attached(inode, new_wb);
+		}
 	} else {
 		inode->i_wb = new_wb;
 	}
@@ -1124,7 +1156,7 @@ void inode_io_list_del(struct inode *inode)
 
 	wb = inode_to_wb_and_lock_list(inode);
 	spin_lock(&inode->i_lock);
-	inode_io_list_del_locked(inode, wb);
+	inode_io_list_del_locked(inode, wb, true);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&wb->list_lock);
 }
@@ -1437,7 +1469,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 		inode->i_state &= ~I_SYNC_QUEUED;
 	} else {
 		/* The inode is clean. Remove from writeback lists. */
-		inode_io_list_del_locked(inode, wb);
+		inode_io_list_del_locked(inode, wb, false);
 	}
 }
 
@@ -1589,7 +1621,7 @@ static int writeback_single_inode(struct inode *inode,
 	 * responsible for the writeback lists.
 	 */
 	if (!(inode->i_state & I_DIRTY_ALL))
-		inode_io_list_del_locked(inode, wb);
+		inode_io_list_del_locked(inode, wb, false);
 	spin_unlock(&wb->list_lock);
 	inode_sync_complete(inode);
 out:
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index fff9367a6348..e5dc238ebe4f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -154,6 +154,7 @@ struct bdi_writeback {
 	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
 	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
 	struct list_head blkcg_node;	/* anchored at blkcg->cgwb_list */
+	struct list_head b_attached;	/* attached inodes, protected by list_lock */
 
 	union {
 		struct work_struct release_work;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 44df4fcef65c..4256e66802e6 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -177,6 +177,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 void wb_memcg_offline(struct mem_cgroup *memcg);
 void wb_blkcg_offline(struct blkcg *blkcg);
 int inode_congested(struct inode *inode, int cong_bits);
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb);
 
 /**
  * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -345,6 +346,12 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 	return false;
 }
 
+static inline void inode_cgwb_move_to_attached(struct inode *inode,
+					       struct bdi_writeback *wb)
+{
+	list_del_init(&inode->i_io_list);
+}
+
 static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 {
 	return &bdi->wb;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8e5c5bb16e2d..572a13c40c90 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -212,6 +212,7 @@ static inline void wait_on_inode(struct inode *inode)
 #include <linux/bio.h>
 
 void __inode_attach_wb(struct inode *inode, struct page *page);
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb);
 void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 				 struct inode *inode)
 	__releases(&inode->i_lock);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 576220acd686..54c5dc4b8c24 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -396,6 +396,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
+	WARN_ON_ONCE(!list_empty(&wb->b_attached));
 	kfree_rcu(wb, rcu);
 }
 
@@ -472,6 +473,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
 	wb->memcg_css = memcg_css;
 	wb->blkcg_css = blkcg_css;
+	INIT_LIST_HEAD(&wb->b_attached);
 	INIT_WORK(&wb->release_work, cgwb_release_workfn);
 	set_bit(WB_registered, &wb->state);
 
-- 
2.31.1



  reply	other threads:[~2021-05-26 22:26 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-26 22:25 [PATCH v5 0/2] cgroup, blkcg: prevent dirty inodes to pin dying memory cgroups Roman Gushchin
2021-05-26 22:25 ` Roman Gushchin [this message]
2021-05-27 10:35   ` [PATCH v5 1/2] writeback, cgroup: keep list of inodes attached to bdi_writeback Jan Kara
2021-05-27 16:32     ` Roman Gushchin
2021-05-26 22:25 ` [PATCH v5 2/2] writeback, cgroup: release dying cgwbs by switching attached inodes Roman Gushchin
2021-05-27  3:24   ` Hillf Danton
2021-05-27 16:29     ` Roman Gushchin
2021-05-27 11:24   ` Jan Kara
2021-05-27 17:48     ` Roman Gushchin
2021-05-27 19:45       ` Roman Gushchin
2021-05-28 13:05       ` Jan Kara
2021-05-28 16:25         ` Roman Gushchin
2021-05-28  2:58   ` Ming Lei
2021-05-28 16:22     ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210526222557.3118114-2-guro@fb.com \
    --to=guro@fb.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dchinner@redhat.com \
    --cc=dennis@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tj@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox