From: Chris Li <chrisl@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
"Wei Xu" <weixugc@google.com>, "Yu Zhao" <yuzhao@google.com>,
"Greg Thelen" <gthelen@google.com>,
"Chun-Tse Shao" <ctshao@google.com>,
"Suren Baghdasaryan" <surenb@google.com>,
"Yosry Ahmed" <yosryahmed@google.com>,
"Brain Geffon" <bgeffon@google.com>,
"Minchan Kim" <minchan@kernel.org>,
"Michal Hocko" <mhocko@suse.com>,
"Mel Gorman" <mgorman@techsingularity.net>,
"Huang Ying" <ying.huang@intel.com>,
"Nhat Pham" <nphamcs@gmail.com>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Kairui Song" <kasong@tencent.com>,
"Zhongkun He" <hezhongkun.hzk@bytedance.com>,
"Kemeng Shi" <shikemeng@huaweicloud.com>,
"Barry Song" <v-songbaohua@oppo.com>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
"Liam R. Howlett" <Liam.Howlett@oracle.com>,
"Joel Fernandes" <joel@joelfernandes.org>,
"Chengming Zhou" <zhouchengming@bytedance.com>,
"Chris Li" <chrisl@kernel.org>
Subject: [PATCH 2/2] mm: zswap.c: remove RB tree
Date: Wed, 17 Jan 2024 19:05:42 -0800 [thread overview]
Message-ID: <20240117-zswap-xarray-v1-2-6daa86c08fae@kernel.org> (raw)
In-Reply-To: <20240117-zswap-xarray-v1-0-6daa86c08fae@kernel.org>
remove the RB tree code and the RB tree data structure
from zswap.
The xarray insert and erase code have been updated to
use the XAS version of the API to cache the lookup before
the final xarray store.
The zswap tree spinlock hasn't been removed yet due to
other usage outside of the zswap tree. The zswap xarray
function should work fine with its internal lock on RCU
without the zswap tree lock.
This removes the RB node inside the zswap entry, saving
about three pointers in size. Considering the extra
overhead of xarray lookup tables, this should have some
net saving in terms of memory overhead in zswap if the
index is dense.
The zswap RB tree spin lock is still there to protect
the zswap entry. Expect the follow up change to merge
the RB tree spin lock with the xarray lock.
---
mm/zswap.c | 98 +++++++++++++++++++++++---------------------------------------
1 file changed, 36 insertions(+), 62 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index a40b0076722b..555d5608d401 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -197,7 +197,6 @@ struct zswap_pool {
* This structure contains the metadata for tracking a single compressed
* page within zswap.
*
- * rbnode - links the entry into red-black tree for the appropriate swap type
* swpentry - associated swap entry, the offset indexes into the red-black tree
* refcount - the number of outstanding reference to the entry. This is needed
* to protect against premature freeing of the entry by code
@@ -215,7 +214,6 @@ struct zswap_pool {
* lru - handle to the pool's lru used to evict pages.
*/
struct zswap_entry {
- struct rb_node rbnode;
swp_entry_t swpentry;
int refcount;
unsigned int length;
@@ -234,7 +232,6 @@ struct zswap_entry {
* - the refcount field of each entry in the tree
*/
struct zswap_tree {
- struct rb_root rbroot;
struct xarray xarray;
spinlock_t lock;
};
@@ -357,7 +354,6 @@ static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
if (!entry)
return NULL;
entry->refcount = 1;
- RB_CLEAR_NODE(&entry->rbnode);
return entry;
}
@@ -465,25 +461,7 @@ static void zswap_lru_putback(struct list_lru *list_lru,
**********************************/
static struct zswap_entry *zswap_search(struct zswap_tree *tree, pgoff_t offset)
{
- struct rb_node *node = tree->rbroot.rb_node;
- struct zswap_entry *entry;
- pgoff_t entry_offset;
-
- while (node) {
- entry = rb_entry(node, struct zswap_entry, rbnode);
- entry_offset = swp_offset(entry->swpentry);
- if (entry_offset > offset)
- node = node->rb_left;
- else if (entry_offset < offset)
- node = node->rb_right;
- else {
- struct zswap_entry *e = xa_load(&tree->xarray, offset);
-
- BUG_ON(entry != e);
- return entry;
- }
- }
- return NULL;
+ return xa_load(&tree->xarray, offset);
}
/*
@@ -493,45 +471,47 @@ static struct zswap_entry *zswap_search(struct zswap_tree *tree, pgoff_t offset)
static int zswap_insert(struct zswap_tree *tree, struct zswap_entry *entry,
struct zswap_entry **dupentry)
{
- struct rb_root *root = &tree->rbroot;
- struct rb_node **link = &root->rb_node, *parent = NULL;
- struct zswap_entry *myentry, *old;
- pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
-
-
- while (*link) {
- parent = *link;
- myentry = rb_entry(parent, struct zswap_entry, rbnode);
- myentry_offset = swp_offset(myentry->swpentry);
- if (myentry_offset > entry_offset)
- link = &(*link)->rb_left;
- else if (myentry_offset < entry_offset)
- link = &(*link)->rb_right;
- else {
- old = xa_load(&tree->xarray, entry_offset);
- BUG_ON(old != myentry);
- *dupentry = myentry;
+ struct zswap_entry *e;
+ pgoff_t offset = swp_offset(entry->swpentry);
+ XA_STATE(xas, &tree->xarray, offset);
+
+ do {
+ xas_lock_irq(&xas);
+ do {
+ e = xas_load(&xas);
+ if (xa_is_zero(e))
+ e = NULL;
+ } while (xas_retry(&xas, e));
+ if (xas_valid(&xas) && e) {
+ xas_unlock_irq(&xas);
+ *dupentry = e;
return -EEXIST;
}
- }
- rb_link_node(&entry->rbnode, parent, link);
- rb_insert_color(&entry->rbnode, root);
- old = xa_store(&tree->xarray, entry_offset, entry, GFP_KERNEL);
- return 0;
+ xas_store(&xas, entry);
+ xas_unlock_irq(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+ return xas_error(&xas);
}
static bool zswap_erase(struct zswap_tree *tree, struct zswap_entry *entry)
{
+ struct zswap_entry *e;
pgoff_t offset = swp_offset(entry->swpentry);
- if (!RB_EMPTY_NODE(&entry->rbnode)) {
- struct zswap_entry *old;
- old = xa_erase(&tree->xarray, offset);
- BUG_ON(old != entry);
- rb_erase(&entry->rbnode, &tree->rbroot);
- RB_CLEAR_NODE(&entry->rbnode);
- return true;
- }
- return false;
+ XA_STATE(xas, &tree->xarray, offset);
+
+ do {
+ xas_lock_irq(&xas);
+ do {
+ e = xas_load(&xas);
+ } while (xas_retry(&xas, e));
+ if (xas_valid(&xas) && e != entry) {
+ xas_unlock_irq(&xas);
+ return false;
+ }
+ xas_store(&xas, NULL);
+ xas_unlock_irq(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+ return !xas_error(&xas);
}
static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
@@ -583,7 +563,6 @@ static void zswap_entry_put(struct zswap_tree *tree,
WARN_ON_ONCE(refcount < 0);
if (refcount == 0) {
- WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
zswap_free_entry(entry);
}
}
@@ -1799,7 +1778,6 @@ void zswap_swapon(int type)
return;
}
- tree->rbroot = RB_ROOT;
xa_init(&tree->xarray);
spin_lock_init(&tree->lock);
zswap_trees[type] = tree;
@@ -1808,7 +1786,7 @@ void zswap_swapon(int type)
void zswap_swapoff(int type)
{
struct zswap_tree *tree = zswap_trees[type];
- struct zswap_entry *entry, *e, *n;
+ struct zswap_entry *e;
XA_STATE(xas, tree ? &tree->xarray : NULL, 0);
if (!tree)
@@ -1820,10 +1798,6 @@ void zswap_swapoff(int type)
xas_for_each(&xas, e, ULONG_MAX)
zswap_invalidate_entry(tree, e);
- rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
- BUG_ON(entry);
-
- tree->rbroot = RB_ROOT;
spin_unlock(&tree->lock);
kfree(tree);
zswap_trees[type] = NULL;
--
2.43.0.429.g432eaa2c6b-goog
next prev parent reply other threads:[~2024-01-18 3:06 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-18 3:05 [PATCH 0/2] RFC: zswap tree use xarray instead of " Chris Li
2024-01-18 3:05 ` [PATCH 1/2] mm: zswap.c: add xarray tree to zswap Chris Li
2024-01-18 6:20 ` Yosry Ahmed
2024-01-18 13:52 ` Matthew Wilcox
2024-01-18 16:59 ` Yosry Ahmed
2024-01-18 18:25 ` Matthew Wilcox
2024-01-19 5:28 ` Chris Li
2024-01-19 19:30 ` Yosry Ahmed
2024-01-19 5:24 ` Chris Li
2024-01-19 19:29 ` Yosry Ahmed
2024-01-19 20:04 ` Matthew Wilcox
2024-01-19 21:41 ` Yosry Ahmed
2024-01-19 22:05 ` Chris Li
2024-01-19 22:08 ` Yosry Ahmed
2024-01-18 3:05 ` Chris Li [this message]
2024-01-18 6:35 ` [PATCH 2/2] mm: zswap.c: remove RB tree Yosry Ahmed
2024-01-18 19:35 ` Yosry Ahmed
2024-01-19 5:49 ` Chris Li
2024-01-19 19:37 ` Yosry Ahmed
2024-01-19 5:43 ` Chris Li
2024-01-19 19:36 ` Yosry Ahmed
2024-01-19 21:31 ` Chris Li
2024-01-19 21:44 ` Yosry Ahmed
2024-01-18 6:01 ` [PATCH 0/2] RFC: zswap tree use xarray instead of " Yosry Ahmed
2024-01-18 6:39 ` Yosry Ahmed
2024-01-18 6:57 ` Chengming Zhou
2024-01-18 7:02 ` Yosry Ahmed
2024-01-18 7:19 ` Chris Li
2024-01-18 7:35 ` Chengming Zhou
2024-01-19 4:59 ` Chris Li
2024-01-19 6:18 ` Chengming Zhou
2024-01-19 10:26 ` Chris Li
2024-01-19 11:12 ` Chengming Zhou
2024-01-19 11:59 ` Chris Li
2024-01-18 6:48 ` Christopher Li
2024-01-18 7:05 ` Yosry Ahmed
2024-01-18 7:28 ` Chris Li
2024-01-18 17:14 ` Yosry Ahmed
2024-01-18 14:48 ` Johannes Weiner
2024-01-18 18:59 ` Liam R. Howlett
2024-01-19 5:13 ` Chris Li
2024-01-18 18:01 ` Nhat Pham
2024-01-19 5:14 ` Chris Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240117-zswap-xarray-v1-2-6daa86c08fae@kernel.org \
--to=chrisl@kernel.org \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=bgeffon@google.com \
--cc=ctshao@google.com \
--cc=gthelen@google.com \
--cc=hannes@cmpxchg.org \
--cc=hezhongkun.hzk@bytedance.com \
--cc=joel@joelfernandes.org \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mhocko@suse.com \
--cc=minchan@kernel.org \
--cc=nphamcs@gmail.com \
--cc=shikemeng@huaweicloud.com \
--cc=surenb@google.com \
--cc=v-songbaohua@oppo.com \
--cc=weixugc@google.com \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yosryahmed@google.com \
--cc=yuzhao@google.com \
--cc=zhouchengming@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox