* [PATCH v5 1/5] mm/zswap: reuse dstmem when decompress
2023-12-28 9:45 [PATCH v5 0/5] mm/zswap: dstmem reuse optimizations and cleanups Chengming Zhou
@ 2023-12-28 9:45 ` Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 2/5] mm/zswap: refactor out __zswap_load() Chengming Zhou
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Chengming Zhou @ 2023-12-28 9:45 UTC (permalink / raw)
To: Barry Song, Yosry Ahmed, Nhat Pham, Andrew Morton, Dan Streetman,
Vitaly Wool, Johannes Weiner, Chris Li, Seth Jennings
Cc: Yosry Ahmed, Nhat Pham, Chris Li, linux-mm, linux-kernel, Chengming Zhou
In the !zpool_can_sleep_mapped() case such as zsmalloc, we need to first
copy the entry->handle memory to a temporary memory, which is allocated
using kmalloc.
Obviously we can reuse the per-compressor dstmem to avoid allocating
every time, since it's percpu-compressor and protected in percpu mutex.
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Chris Li <chrisl@kernel.org> (Google)
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
mm/zswap.c | 44 ++++++++++++--------------------------------
1 file changed, 12 insertions(+), 32 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 7ee54a3d8281..41df2d97951b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1417,19 +1417,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
struct crypto_acomp_ctx *acomp_ctx;
struct zpool *pool = zswap_find_zpool(entry);
bool page_was_allocated;
- u8 *src, *tmp = NULL;
+ u8 *src;
unsigned int dlen;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
};
- if (!zpool_can_sleep_mapped(pool)) {
- tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!tmp)
- return -ENOMEM;
- }
-
/* try to allocate swap cache page */
mpol = get_task_policy(current);
page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
@@ -1465,15 +1459,15 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
/* decompress */
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
dlen = PAGE_SIZE;
+ mutex_lock(acomp_ctx->mutex);
src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO);
if (!zpool_can_sleep_mapped(pool)) {
- memcpy(tmp, src, entry->length);
- src = tmp;
+ memcpy(acomp_ctx->dstmem, src, entry->length);
+ src = acomp_ctx->dstmem;
zpool_unmap_handle(pool, entry->handle);
}
- mutex_lock(acomp_ctx->mutex);
sg_init_one(&input, src, entry->length);
sg_init_table(&output, 1);
sg_set_page(&output, page, PAGE_SIZE, 0);
@@ -1482,9 +1476,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
dlen = acomp_ctx->req->dlen;
mutex_unlock(acomp_ctx->mutex);
- if (!zpool_can_sleep_mapped(pool))
- kfree(tmp);
- else
+ if (zpool_can_sleep_mapped(pool))
zpool_unmap_handle(pool, entry->handle);
BUG_ON(ret);
@@ -1508,9 +1500,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
return ret;
fail:
- if (!zpool_can_sleep_mapped(pool))
- kfree(tmp);
-
/*
* If we get here because the page is already in swapcache, a
* load may be happening concurrently. It is safe and okay to
@@ -1772,7 +1761,7 @@ bool zswap_load(struct folio *folio)
struct zswap_entry *entry;
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
- u8 *src, *dst, *tmp;
+ u8 *src, *dst;
struct zpool *zpool;
unsigned int dlen;
bool ret;
@@ -1797,26 +1786,19 @@ bool zswap_load(struct folio *folio)
}
zpool = zswap_find_zpool(entry);
- if (!zpool_can_sleep_mapped(zpool)) {
- tmp = kmalloc(entry->length, GFP_KERNEL);
- if (!tmp) {
- ret = false;
- goto freeentry;
- }
- }
/* decompress */
dlen = PAGE_SIZE;
- src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+ acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ mutex_lock(acomp_ctx->mutex);
+ src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
if (!zpool_can_sleep_mapped(zpool)) {
- memcpy(tmp, src, entry->length);
- src = tmp;
+ memcpy(acomp_ctx->dstmem, src, entry->length);
+ src = acomp_ctx->dstmem;
zpool_unmap_handle(zpool, entry->handle);
}
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(acomp_ctx->mutex);
sg_init_one(&input, src, entry->length);
sg_init_table(&output, 1);
sg_set_page(&output, page, PAGE_SIZE, 0);
@@ -1827,15 +1809,13 @@ bool zswap_load(struct folio *folio)
if (zpool_can_sleep_mapped(zpool))
zpool_unmap_handle(zpool, entry->handle);
- else
- kfree(tmp);
ret = true;
stats:
count_vm_event(ZSWPIN);
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
-freeentry:
+
spin_lock(&tree->lock);
if (ret && zswap_exclusive_loads_enabled) {
zswap_invalidate_entry(tree, entry);
--
b4 0.10.1
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH v5 2/5] mm/zswap: refactor out __zswap_load()
2023-12-28 9:45 [PATCH v5 0/5] mm/zswap: dstmem reuse optimizations and cleanups Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 1/5] mm/zswap: reuse dstmem when decompress Chengming Zhou
@ 2023-12-28 9:45 ` Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 3/5] mm/zswap: cleanup zswap_load() Chengming Zhou
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Chengming Zhou @ 2023-12-28 9:45 UTC (permalink / raw)
To: Barry Song, Yosry Ahmed, Nhat Pham, Andrew Morton, Dan Streetman,
Vitaly Wool, Johannes Weiner, Chris Li, Seth Jennings
Cc: Yosry Ahmed, Nhat Pham, Chris Li, linux-mm, linux-kernel, Chengming Zhou
The zswap_load() and zswap_writeback_entry() have the same part that
decompress the data from zswap_entry to page, so refactor out the
common part as __zswap_load(entry, page).
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Chris Li <chrisl@kernel.org> (Google)
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
mm/zswap.c | 92 ++++++++++++++++++++++----------------------------------------
1 file changed, 32 insertions(+), 60 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 41df2d97951b..b25d7d03851d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1392,6 +1392,35 @@ static int zswap_enabled_param_set(const char *val,
return ret;
}
+static void __zswap_load(struct zswap_entry *entry, struct page *page)
+{
+ struct zpool *zpool = zswap_find_zpool(entry);
+ struct scatterlist input, output;
+ struct crypto_acomp_ctx *acomp_ctx;
+ u8 *src;
+
+ acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ mutex_lock(acomp_ctx->mutex);
+
+ src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+ if (!zpool_can_sleep_mapped(zpool)) {
+ memcpy(acomp_ctx->dstmem, src, entry->length);
+ src = acomp_ctx->dstmem;
+ zpool_unmap_handle(zpool, entry->handle);
+ }
+
+ sg_init_one(&input, src, entry->length);
+ sg_init_table(&output, 1);
+ sg_set_page(&output, page, PAGE_SIZE, 0);
+ acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
+ BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
+ BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+ mutex_unlock(acomp_ctx->mutex);
+
+ if (zpool_can_sleep_mapped(zpool))
+ zpool_unmap_handle(zpool, entry->handle);
+}
+
/*********************************
* writeback code
**********************************/
@@ -1413,12 +1442,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
swp_entry_t swpentry = entry->swpentry;
struct page *page;
struct mempolicy *mpol;
- struct scatterlist input, output;
- struct crypto_acomp_ctx *acomp_ctx;
- struct zpool *pool = zswap_find_zpool(entry);
bool page_was_allocated;
- u8 *src;
- unsigned int dlen;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
@@ -1456,31 +1480,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
}
spin_unlock(&tree->lock);
- /* decompress */
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- dlen = PAGE_SIZE;
- mutex_lock(acomp_ctx->mutex);
-
- src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO);
- if (!zpool_can_sleep_mapped(pool)) {
- memcpy(acomp_ctx->dstmem, src, entry->length);
- src = acomp_ctx->dstmem;
- zpool_unmap_handle(pool, entry->handle);
- }
-
- sg_init_one(&input, src, entry->length);
- sg_init_table(&output, 1);
- sg_set_page(&output, page, PAGE_SIZE, 0);
- acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
- ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
- dlen = acomp_ctx->req->dlen;
- mutex_unlock(acomp_ctx->mutex);
-
- if (zpool_can_sleep_mapped(pool))
- zpool_unmap_handle(pool, entry->handle);
-
- BUG_ON(ret);
- BUG_ON(dlen != PAGE_SIZE);
+ __zswap_load(entry, page);
/* page is up to date */
SetPageUptodate(page);
@@ -1759,11 +1759,7 @@ bool zswap_load(struct folio *folio)
struct page *page = &folio->page;
struct zswap_tree *tree = zswap_trees[type];
struct zswap_entry *entry;
- struct scatterlist input, output;
- struct crypto_acomp_ctx *acomp_ctx;
- u8 *src, *dst;
- struct zpool *zpool;
- unsigned int dlen;
+ u8 *dst;
bool ret;
VM_WARN_ON_ONCE(!folio_test_locked(folio));
@@ -1785,31 +1781,7 @@ bool zswap_load(struct folio *folio)
goto stats;
}
- zpool = zswap_find_zpool(entry);
-
- /* decompress */
- dlen = PAGE_SIZE;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(acomp_ctx->mutex);
-
- src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (!zpool_can_sleep_mapped(zpool)) {
- memcpy(acomp_ctx->dstmem, src, entry->length);
- src = acomp_ctx->dstmem;
- zpool_unmap_handle(zpool, entry->handle);
- }
-
- sg_init_one(&input, src, entry->length);
- sg_init_table(&output, 1);
- sg_set_page(&output, page, PAGE_SIZE, 0);
- acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
- if (crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait))
- WARN_ON(1);
- mutex_unlock(acomp_ctx->mutex);
-
- if (zpool_can_sleep_mapped(zpool))
- zpool_unmap_handle(zpool, entry->handle);
-
+ __zswap_load(entry, page);
ret = true;
stats:
count_vm_event(ZSWPIN);
--
b4 0.10.1
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH v5 3/5] mm/zswap: cleanup zswap_load()
2023-12-28 9:45 [PATCH v5 0/5] mm/zswap: dstmem reuse optimizations and cleanups Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 1/5] mm/zswap: reuse dstmem when decompress Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 2/5] mm/zswap: refactor out __zswap_load() Chengming Zhou
@ 2023-12-28 9:45 ` Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 4/5] mm/zswap: cleanup zswap_writeback_entry() Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 5/5] mm/zswap: change per-cpu mutex and buffer to per-acomp_ctx Chengming Zhou
4 siblings, 0 replies; 7+ messages in thread
From: Chengming Zhou @ 2023-12-28 9:45 UTC (permalink / raw)
To: Barry Song, Yosry Ahmed, Nhat Pham, Andrew Morton, Dan Streetman,
Vitaly Wool, Johannes Weiner, Chris Li, Seth Jennings
Cc: Yosry Ahmed, Nhat Pham, Chris Li, linux-mm, linux-kernel, Chengming Zhou
After the common decompress part goes to __zswap_load(), we can cleanup
the zswap_load() a little.
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Chis Li <chrisl@kernel.org> (Google)
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
mm/zswap.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index b25d7d03851d..618989463535 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1760,7 +1760,6 @@ bool zswap_load(struct folio *folio)
struct zswap_tree *tree = zswap_trees[type];
struct zswap_entry *entry;
u8 *dst;
- bool ret;
VM_WARN_ON_ONCE(!folio_test_locked(folio));
@@ -1773,23 +1772,20 @@ bool zswap_load(struct folio *folio)
}
spin_unlock(&tree->lock);
- if (!entry->length) {
+ if (entry->length)
+ __zswap_load(entry, page);
+ else {
dst = kmap_local_page(page);
zswap_fill_page(dst, entry->value);
kunmap_local(dst);
- ret = true;
- goto stats;
}
- __zswap_load(entry, page);
- ret = true;
-stats:
count_vm_event(ZSWPIN);
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
spin_lock(&tree->lock);
- if (ret && zswap_exclusive_loads_enabled) {
+ if (zswap_exclusive_loads_enabled) {
zswap_invalidate_entry(tree, entry);
folio_mark_dirty(folio);
} else if (entry->length) {
@@ -1799,7 +1795,7 @@ bool zswap_load(struct folio *folio)
zswap_entry_put(tree, entry);
spin_unlock(&tree->lock);
- return ret;
+ return true;
}
void zswap_invalidate(int type, pgoff_t offset)
--
b4 0.10.1
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH v5 4/5] mm/zswap: cleanup zswap_writeback_entry()
2023-12-28 9:45 [PATCH v5 0/5] mm/zswap: dstmem reuse optimizations and cleanups Chengming Zhou
` (2 preceding siblings ...)
2023-12-28 9:45 ` [PATCH v5 3/5] mm/zswap: cleanup zswap_load() Chengming Zhou
@ 2023-12-28 9:45 ` Chengming Zhou
2023-12-28 9:45 ` [PATCH v5 5/5] mm/zswap: change per-cpu mutex and buffer to per-acomp_ctx Chengming Zhou
4 siblings, 0 replies; 7+ messages in thread
From: Chengming Zhou @ 2023-12-28 9:45 UTC (permalink / raw)
To: Barry Song, Yosry Ahmed, Nhat Pham, Andrew Morton, Dan Streetman,
Vitaly Wool, Johannes Weiner, Chris Li, Seth Jennings
Cc: Yosry Ahmed, Nhat Pham, Chris Li, linux-mm, linux-kernel, Chengming Zhou
Also after the common decompress part goes to __zswap_load(), we can
cleanup the zswap_writeback_entry() a little.
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Chris Li <chrisl@kernel.org> (Google)
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
mm/zswap.c | 29 ++++++++++-------------------
1 file changed, 10 insertions(+), 19 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 618989463535..e756b2af838b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1443,7 +1443,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
struct page *page;
struct mempolicy *mpol;
bool page_was_allocated;
- int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
};
@@ -1452,16 +1451,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
mpol = get_task_policy(current);
page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
NO_INTERLEAVE_INDEX, &page_was_allocated, true);
- if (!page) {
- ret = -ENOMEM;
- goto fail;
- }
+ if (!page)
+ return -ENOMEM;
- /* Found an existing page, we raced with load/swapin */
+ /*
+ * Found an existing page, we raced with load/swapin. We generally
+ * writeback cold pages from zswap, and swapin means the page just
+ * became hot. Skip this page and let the caller find another one.
+ */
if (!page_was_allocated) {
put_page(page);
- ret = -EEXIST;
- goto fail;
+ return -EEXIST;
}
/*
@@ -1475,8 +1475,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
spin_unlock(&tree->lock);
delete_from_swap_cache(page_folio(page));
- ret = -ENOMEM;
- goto fail;
+ return -ENOMEM;
}
spin_unlock(&tree->lock);
@@ -1497,15 +1496,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
__swap_writepage(page, &wbc);
put_page(page);
- return ret;
-
-fail:
- /*
- * If we get here because the page is already in swapcache, a
- * load may be happening concurrently. It is safe and okay to
- * not free the entry. It is also okay to return !0.
- */
- return ret;
+ return 0;
}
static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
--
b4 0.10.1
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH v5 5/5] mm/zswap: change per-cpu mutex and buffer to per-acomp_ctx
2023-12-28 9:45 [PATCH v5 0/5] mm/zswap: dstmem reuse optimizations and cleanups Chengming Zhou
` (3 preceding siblings ...)
2023-12-28 9:45 ` [PATCH v5 4/5] mm/zswap: cleanup zswap_writeback_entry() Chengming Zhou
@ 2023-12-28 9:45 ` Chengming Zhou
2023-12-28 15:19 ` Yosry Ahmed
4 siblings, 1 reply; 7+ messages in thread
From: Chengming Zhou @ 2023-12-28 9:45 UTC (permalink / raw)
To: Barry Song, Yosry Ahmed, Nhat Pham, Andrew Morton, Dan Streetman,
Vitaly Wool, Johannes Weiner, Chris Li, Seth Jennings
Cc: Yosry Ahmed, Nhat Pham, Chris Li, linux-mm, linux-kernel, Chengming Zhou
First of all, we need to rename acomp_ctx->dstmem field to buffer,
since we are now using for purposes other than compression.
Then we change per-cpu mutex and buffer to per-acomp_ctx, since
them belong to the acomp_ctx and are necessary parts when used
in the compress/decompress contexts.
So we can remove the old per-cpu mutex and dstmem.
Acked-by: Chris Li <chrisl@kernel.org> (Google)
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
include/linux/cpuhotplug.h | 1 -
mm/zswap.c | 104 ++++++++++++++-------------------------------
2 files changed, 33 insertions(+), 72 deletions(-)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index efc0c0b07efb..c3e06e21766a 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -124,7 +124,6 @@ enum cpuhp_state {
CPUHP_ARM_BL_PREPARE,
CPUHP_TRACE_RB_PREPARE,
CPUHP_MM_ZS_PREPARE,
- CPUHP_MM_ZSWP_MEM_PREPARE,
CPUHP_MM_ZSWP_POOL_PREPARE,
CPUHP_KVM_PPC_BOOK3S_PREPARE,
CPUHP_ZCOMP_PREPARE,
diff --git a/mm/zswap.c b/mm/zswap.c
index e756b2af838b..7c18755c6e1c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -166,8 +166,8 @@ struct crypto_acomp_ctx {
struct crypto_acomp *acomp;
struct acomp_req *req;
struct crypto_wait wait;
- u8 *dstmem;
- struct mutex *mutex;
+ u8 *buffer;
+ struct mutex mutex;
};
/*
@@ -694,63 +694,26 @@ static void zswap_alloc_shrinker(struct zswap_pool *pool)
/*********************************
* per-cpu code
**********************************/
-static DEFINE_PER_CPU(u8 *, zswap_dstmem);
-/*
- * If users dynamically change the zpool type and compressor at runtime, i.e.
- * zswap is running, zswap can have more than one zpool on one cpu, but they
- * are sharing dtsmem. So we need this mutex to be per-cpu.
- */
-static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
-
-static int zswap_dstmem_prepare(unsigned int cpu)
-{
- struct mutex *mutex;
- u8 *dst;
-
- dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- if (!dst)
- return -ENOMEM;
-
- mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
- if (!mutex) {
- kfree(dst);
- return -ENOMEM;
- }
-
- mutex_init(mutex);
- per_cpu(zswap_dstmem, cpu) = dst;
- per_cpu(zswap_mutex, cpu) = mutex;
- return 0;
-}
-
-static int zswap_dstmem_dead(unsigned int cpu)
-{
- struct mutex *mutex;
- u8 *dst;
-
- mutex = per_cpu(zswap_mutex, cpu);
- kfree(mutex);
- per_cpu(zswap_mutex, cpu) = NULL;
-
- dst = per_cpu(zswap_dstmem, cpu);
- kfree(dst);
- per_cpu(zswap_dstmem, cpu) = NULL;
-
- return 0;
-}
-
static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
{
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
struct crypto_acomp *acomp;
struct acomp_req *req;
+ int ret;
+
+ mutex_init(&acomp_ctx->mutex);
+
+ acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+ if (!acomp_ctx->buffer)
+ return -ENOMEM;
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
if (IS_ERR(acomp)) {
pr_err("could not alloc crypto acomp %s : %ld\n",
pool->tfm_name, PTR_ERR(acomp));
- return PTR_ERR(acomp);
+ ret = PTR_ERR(acomp);
+ goto acomp_fail;
}
acomp_ctx->acomp = acomp;
@@ -758,8 +721,8 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
if (!req) {
pr_err("could not alloc crypto acomp_request %s\n",
pool->tfm_name);
- crypto_free_acomp(acomp_ctx->acomp);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto req_fail;
}
acomp_ctx->req = req;
@@ -772,10 +735,13 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
- acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
- acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
-
return 0;
+
+req_fail:
+ crypto_free_acomp(acomp_ctx->acomp);
+acomp_fail:
+ kfree(acomp_ctx->buffer);
+ return ret;
}
static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
@@ -788,6 +754,7 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
acomp_request_free(acomp_ctx->req);
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
crypto_free_acomp(acomp_ctx->acomp);
+ kfree(acomp_ctx->buffer);
}
return 0;
@@ -1400,12 +1367,12 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
u8 *src;
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(acomp_ctx->mutex);
+ mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
if (!zpool_can_sleep_mapped(zpool)) {
- memcpy(acomp_ctx->dstmem, src, entry->length);
- src = acomp_ctx->dstmem;
+ memcpy(acomp_ctx->buffer, src, entry->length);
+ src = acomp_ctx->buffer;
zpool_unmap_handle(zpool, entry->handle);
}
@@ -1415,7 +1382,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
- mutex_unlock(acomp_ctx->mutex);
+ mutex_unlock(&acomp_ctx->mutex);
if (zpool_can_sleep_mapped(zpool))
zpool_unmap_handle(zpool, entry->handle);
@@ -1636,13 +1603,17 @@ bool zswap_store(struct folio *folio)
/* compress */
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
- mutex_lock(acomp_ctx->mutex);
+ mutex_lock(&acomp_ctx->mutex);
- dst = acomp_ctx->dstmem;
+ dst = acomp_ctx->buffer;
sg_init_table(&input, 1);
sg_set_page(&input, page, PAGE_SIZE, 0);
- /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
+ /*
+ * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+ * and hardware-accelerators may won't check the dst buffer size, so
+ * giving the dst buffer with enough length to avoid buffer overflow.
+ */
sg_init_one(&output, dst, PAGE_SIZE * 2);
acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
/*
@@ -1682,7 +1653,7 @@ bool zswap_store(struct folio *folio)
buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
memcpy(buf, dst, dlen);
zpool_unmap_handle(zpool, handle);
- mutex_unlock(acomp_ctx->mutex);
+ mutex_unlock(&acomp_ctx->mutex);
/* populate entry */
entry->swpentry = swp_entry(type, offset);
@@ -1725,7 +1696,7 @@ bool zswap_store(struct folio *folio)
return true;
put_dstmem:
- mutex_unlock(acomp_ctx->mutex);
+ mutex_unlock(&acomp_ctx->mutex);
put_pool:
zswap_pool_put(entry->pool);
freepage:
@@ -1900,13 +1871,6 @@ static int zswap_setup(void)
goto cache_fail;
}
- ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
- zswap_dstmem_prepare, zswap_dstmem_dead);
- if (ret) {
- pr_err("dstmem alloc failed\n");
- goto dstmem_fail;
- }
-
ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
"mm/zswap_pool:prepare",
zswap_cpu_comp_prepare,
@@ -1938,8 +1902,6 @@ static int zswap_setup(void)
if (pool)
zswap_pool_destroy(pool);
hp_fail:
- cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
-dstmem_fail:
kmem_cache_destroy(zswap_entry_cache);
cache_fail:
/* if built-in, we aren't unloaded on failure; don't allow use */
--
b4 0.10.1
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH v5 5/5] mm/zswap: change per-cpu mutex and buffer to per-acomp_ctx
2023-12-28 9:45 ` [PATCH v5 5/5] mm/zswap: change per-cpu mutex and buffer to per-acomp_ctx Chengming Zhou
@ 2023-12-28 15:19 ` Yosry Ahmed
0 siblings, 0 replies; 7+ messages in thread
From: Yosry Ahmed @ 2023-12-28 15:19 UTC (permalink / raw)
To: Chengming Zhou
Cc: Barry Song, Nhat Pham, Andrew Morton, Dan Streetman, Vitaly Wool,
Johannes Weiner, Chris Li, Seth Jennings, Chris Li, linux-mm,
linux-kernel
On Thu, Dec 28, 2023 at 1:46 AM Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> First of all, we need to rename acomp_ctx->dstmem field to buffer,
> since we are now using for purposes other than compression.
>
> Then we change per-cpu mutex and buffer to per-acomp_ctx, since
> them belong to the acomp_ctx and are necessary parts when used
> in the compress/decompress contexts.
>
> So we can remove the old per-cpu mutex and dstmem.
>
> Acked-by: Chris Li <chrisl@kernel.org> (Google)
> Reviewed-by: Nhat Pham <nphamcs@gmail.com>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Instead of hardcoding PAGE_SIZE * 2 multiple times, can we define a
constant for this and document the need for the buffer size there?
Even better if that constant is based on WORST_COMPR_FACTOR and shared
between zswap and zram -- but this can be a follow up change.
^ permalink raw reply [flat|nested] 7+ messages in thread