From: vitaly.wool@konsulko.com
To: linux-mm@kvack.org
Cc: ddstreet@ieee.org, akpm@linux-foundation.org,
Vitaly Wool <vitaly.wool@konsulko.com>
Subject: [PATCH v2] zswap: add allocation hysteresis if pool limit is hit
Date: Wed, 8 Jan 2020 22:01:17 +0200 [thread overview]
Message-ID: <20200108200118.15563-1-vitaly.wool@konsulko.com> (raw)
From: Vitaly Wool <vitaly.wool@konsulko.com>
zswap will always try to shrink pool when zswap is full. If there is
a high pressure on zswap it will result in flipping pages in and out
zswap pool without any real benefit, and the overall system
performance will drop. The previous discussion on this subject [1]
ended up with a suggestion to implement a sort of hysteresis to
refuse taking pages into zswap pool until it has sufficient space if
the limit has been hit. This is my take on this.
Hysteresis is controlled with a sysfs-configurable parameter
(namely, /sys/kernel/debug/zswap/accept_threhsold_percent). It
specifies the threshold at which zswap would start accepting pages
again after it became full. Setting this parameter to 100 disables
the hysteresis and sets the zswap behavior to pre-hysteresis state.
[1] https://lkml.org/lkml/2019/11/8/949
Changes since v1:
* full path specified for the sysfs parameter
* Documentation/vm/zswap.rst updated to reflect the new configuration
option
Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
---
Documentation/vm/zswap.rst | 13 ++++++
mm/zswap.c | 85 ++++++++++++++++++++++++--------------
2 files changed, 67 insertions(+), 31 deletions(-)
diff --git a/Documentation/vm/zswap.rst b/Documentation/vm/zswap.rst
index 1444ecd40911..61f6185188cd 100644
--- a/Documentation/vm/zswap.rst
+++ b/Documentation/vm/zswap.rst
@@ -130,6 +130,19 @@ checking for the same-value filled pages during store operation. However, the
existing pages which are marked as same-value filled pages remain stored
unchanged in zswap until they are either loaded or invalidated.
+To prevent zswap from shrinking pool when zswap is full and there's a high
+pressure on swap (this will result in flipping pages in and out zswap pool
+without any real benefit but with a performance drop for the system), a
+special parameter has been introduced to implement a sort of hysteresis to
+refuse taking pages into zswap pool until it has sufficient space if the limit
+has been hit. To set the threshold at which zswap would start accepting pages
+again after it became full, use the sysfs ``accept_threhsold_percent``
+attribute, e. g.::
+
+ echo 80 > /sys/module/zswap/parameters/accept_threhsold_percent
+
+Setting this parameter to 100 will disable the hysteresis.
+
A debugfs interface is provided for various statistic about pool size, number
of pages stored, same-value filled pages and various counters for the reasons
pages are rejected.
diff --git a/mm/zswap.c b/mm/zswap.c
index 46a322316e52..7ec8bd912d13 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -32,6 +32,7 @@
#include <linux/swapops.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
+#include <linux/workqueue.h>
/*********************************
* statistics
@@ -65,6 +66,11 @@ static u64 zswap_reject_kmemcache_fail;
/* Duplicate store was encountered (rare) */
static u64 zswap_duplicate_entry;
+/* Shrinker work queue */
+static struct workqueue_struct *shrink_wq;
+/* Pool limit was hit, we need to calm down */
+static bool zswap_pool_reached_full;
+
/*********************************
* tunables
**********************************/
@@ -109,6 +115,11 @@ module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
static unsigned int zswap_max_pool_percent = 20;
module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
+/* The threshold for accepting new pages after the max_pool_percent was hit */
+static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
+module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
+ uint, 0644);
+
/* Enable/disable handling same-value filled pages (enabled by default) */
static bool zswap_same_filled_pages_enabled = true;
module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
@@ -123,7 +134,8 @@ struct zswap_pool {
struct crypto_comp * __percpu *tfm;
struct kref kref;
struct list_head list;
- struct work_struct work;
+ struct work_struct release_work;
+ struct work_struct shrink_work;
struct hlist_node node;
char tfm_name[CRYPTO_MAX_ALG_NAME];
};
@@ -214,6 +226,13 @@ static bool zswap_is_full(void)
DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
}
+static bool zswap_can_accept(void)
+{
+ return totalram_pages() * zswap_accept_thr_percent / 100 *
+ zswap_max_pool_percent / 100 >
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+}
+
static void zswap_update_total_size(void)
{
struct zswap_pool *pool;
@@ -501,6 +520,16 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}
+static void shrink_worker(struct work_struct *w)
+{
+ struct zswap_pool *pool = container_of(w, typeof(*pool),
+ shrink_work);
+
+ if (zpool_shrink(pool->zpool, 1, NULL))
+ zswap_reject_reclaim_fail++;
+ zswap_pool_put(pool);
+}
+
static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
{
struct zswap_pool *pool;
@@ -551,6 +580,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
*/
kref_init(&pool->kref);
INIT_LIST_HEAD(&pool->list);
+ INIT_WORK(&pool->shrink_work, shrink_worker);
zswap_pool_debug("created", pool);
@@ -624,7 +654,8 @@ static int __must_check zswap_pool_get(struct zswap_pool *pool)
static void __zswap_pool_release(struct work_struct *work)
{
- struct zswap_pool *pool = container_of(work, typeof(*pool), work);
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
+ release_work);
synchronize_rcu();
@@ -647,8 +678,8 @@ static void __zswap_pool_empty(struct kref *kref)
list_del_rcu(&pool->list);
- INIT_WORK(&pool->work, __zswap_pool_release);
- schedule_work(&pool->work);
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
+ schedule_work(&pool->release_work);
spin_unlock(&zswap_pools_lock);
}
@@ -942,22 +973,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
return ret;
}
-static int zswap_shrink(void)
-{
- struct zswap_pool *pool;
- int ret;
-
- pool = zswap_pool_last_get();
- if (!pool)
- return -ENOENT;
-
- ret = zpool_shrink(pool->zpool, 1, NULL);
-
- zswap_pool_put(pool);
-
- return ret;
-}
-
static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
{
unsigned int pos;
@@ -1011,21 +1026,23 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* reclaim space if needed */
if (zswap_is_full()) {
+ struct zswap_pool *pool;
+
zswap_pool_limit_hit++;
- if (zswap_shrink()) {
- zswap_reject_reclaim_fail++;
- ret = -ENOMEM;
- goto reject;
- }
+ zswap_pool_reached_full = true;
+ pool = zswap_pool_last_get();
+ if (pool)
+ queue_work(shrink_wq, &pool->shrink_work);
+ ret = -ENOMEM;
+ goto reject;
+ }
- /* A second zswap_is_full() check after
- * zswap_shrink() to make sure it's now
- * under the max_pool_percent
- */
- if (zswap_is_full()) {
+ if (zswap_pool_reached_full) {
+ if (!zswap_can_accept()) {
ret = -ENOMEM;
goto reject;
- }
+ } else
+ zswap_pool_reached_full = false;
}
/* allocate entry */
@@ -1332,11 +1349,17 @@ static int __init init_zswap(void)
zswap_enabled = false;
}
+ shrink_wq = create_workqueue("zswap-shrink");
+ if (!shrink_wq)
+ goto fallback_fail;
+
frontswap_register_ops(&zswap_frontswap_ops);
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
return 0;
+fallback_fail:
+ zswap_pool_destroy(pool);
hp_fail:
cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
dstmem_fail:
--
2.20.1
reply other threads:[~2020-01-08 20:01 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200108200118.15563-1-vitaly.wool@konsulko.com \
--to=vitaly.wool@konsulko.com \
--cc=akpm@linux-foundation.org \
--cc=ddstreet@ieee.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox