From: Sergey Senozhatsky <senozhatsky@chromium.org>
To: Andrew Morton <akpm@linux-foundation.org>,
Minchan Kim <minchan@kernel.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Yosry Ahmed <yosry.ahmed@linux.dev>,
Nhat Pham <nphamcs@gmail.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Sergey Senozhatsky <senozhatsky@chromium.org>
Subject: [RFC PATCH 5/6] zsmalloc: introduce handle mapping API
Date: Mon, 27 Jan 2025 16:59:30 +0900 [thread overview]
Message-ID: <20250127080254.1302026-6-senozhatsky@chromium.org> (raw)
In-Reply-To: <20250127080254.1302026-1-senozhatsky@chromium.org>
Introduce new API to map/unmap zsmalloc handle/object. The key
difference is that this API does not impose atomicity restrictions
on its users, unlike zs_map_object() which returns with page-faults
and preemption disabled - handle mapping API does not need a per-CPU
vm-area because the users are required to provide an aux buffer for
objects that span several physical pages.
Keep zs_map_object/zs_unmap_object for the time being, as there are
still users of it, but eventually old API will be removed.
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
include/linux/zsmalloc.h | 29 ++++++++
mm/zsmalloc.c | 148 ++++++++++++++++++++++++++++-----------
2 files changed, 138 insertions(+), 39 deletions(-)
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index a48cd0ffe57d..72d84537dd38 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -58,4 +58,33 @@ unsigned long zs_compact(struct zs_pool *pool);
unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
+
+struct zs_handle_mapping {
+ unsigned long handle;
+ /* Points to start of the object data either within local_copy or
+ * within local_mapping. This is what callers should use to access
+ * or modify handle data.
+ */
+ void *handle_mem;
+
+ enum zs_mapmode mode;
+ union {
+ /*
+ * Handle object data copied, because it spans across several
+ * (non-contiguous) physical pages. This pointer should be
+ * set by the zs_map_handle() caller beforehand and should
+ * never be accessed directly.
+ */
+ void *local_copy;
+ /*
+ * Handle object mapped directly. Should never be used
+ * directly.
+ */
+ void *local_mapping;
+ };
+};
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map);
+
#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a5c1f9852072..281bba4a3277 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1132,18 +1132,14 @@ static inline void __zs_cpu_down(struct mapping_area *area)
area->vm_buf = NULL;
}
-static void *__zs_map_object(struct mapping_area *area,
- struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyin(void *buf, struct zpdesc *zpdesc, int off, int size)
{
+ struct zpdesc *zpdescs[2];
size_t sizes[2];
- char *buf = area->vm_buf;
-
- /* disable page faults to match kmap_local_page() return conditions */
- pagefault_disable();
- /* no read fastpath */
- if (area->vm_mm == ZS_MM_WO)
- goto out;
+ zpdescs[0] = zpdesc;
+ zpdescs[1] = get_next_zpdesc(zpdesc);
+ BUG_ON(!zpdescs[1]);
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
@@ -1151,21 +1147,17 @@ static void *__zs_map_object(struct mapping_area *area,
/* copy object to per-cpu buffer */
memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]);
memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]);
-out:
- return area->vm_buf;
}
-static void __zs_unmap_object(struct mapping_area *area,
- struct zpdesc *zpdescs[2], int off, int size)
+static void zs_obj_copyout(void *buf, struct zpdesc *zpdesc, int off, int size)
{
+ struct zpdesc *zpdescs[2];
size_t sizes[2];
- char *buf;
- /* no write fastpath */
- if (area->vm_mm == ZS_MM_RO)
- goto out;
+ zpdescs[0] = zpdesc;
+ zpdescs[1] = get_next_zpdesc(zpdesc);
+ BUG_ON(!zpdescs[1]);
- buf = area->vm_buf;
buf = buf + ZS_HANDLE_SIZE;
size -= ZS_HANDLE_SIZE;
off += ZS_HANDLE_SIZE;
@@ -1176,10 +1168,6 @@ static void __zs_unmap_object(struct mapping_area *area,
/* copy per-cpu buffer to object */
memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]);
memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]);
-
-out:
- /* enable page faults to match kunmap_local() return conditions */
- pagefault_enable();
}
static int zs_cpu_prepare(unsigned int cpu)
@@ -1260,6 +1248,8 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages);
* against nested mappings.
*
* This function returns with preemption and page faults disabled.
+ *
+ * NOTE: this function is deprecated and will be removed.
*/
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm)
@@ -1268,10 +1258,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
struct zpdesc *zpdesc;
unsigned long obj, off;
unsigned int obj_idx;
-
struct size_class *class;
struct mapping_area *area;
- struct zpdesc *zpdescs[2];
void *ret;
/*
@@ -1309,12 +1297,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
goto out;
}
- /* this object spans two pages */
- zpdescs[0] = zpdesc;
- zpdescs[1] = get_next_zpdesc(zpdesc);
- BUG_ON(!zpdescs[1]);
+ ret = area->vm_buf;
+ /* disable page faults to match kmap_local_page() return conditions */
+ pagefault_disable();
+ if (mm != ZS_MM_WO) {
+ /* this object spans two pages */
+ zs_obj_copyin(area->vm_buf, zpdesc, off, class->size);
+ }
- ret = __zs_map_object(area, zpdescs, off, class->size);
out:
if (likely(!ZsHugePage(zspage)))
ret += ZS_HANDLE_SIZE;
@@ -1323,13 +1313,13 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
}
EXPORT_SYMBOL_GPL(zs_map_object);
+/* NOTE: this function is deprecated and will be removed. */
void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct zspage *zspage;
struct zpdesc *zpdesc;
unsigned long obj, off;
unsigned int obj_idx;
-
struct size_class *class;
struct mapping_area *area;
@@ -1340,23 +1330,103 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
off = offset_in_page(class->size * obj_idx);
area = this_cpu_ptr(&zs_map_area);
- if (off + class->size <= PAGE_SIZE)
+ if (off + class->size <= PAGE_SIZE) {
kunmap_local(area->vm_addr);
- else {
- struct zpdesc *zpdescs[2];
+ goto out;
+ }
- zpdescs[0] = zpdesc;
- zpdescs[1] = get_next_zpdesc(zpdesc);
- BUG_ON(!zpdescs[1]);
+ if (area->vm_mm != ZS_MM_RO)
+ zs_obj_copyout(area->vm_buf, zpdesc, off, class->size);
+ /* enable page faults to match kunmap_local() return conditions */
+ pagefault_enable();
- __zs_unmap_object(area, zpdescs, off, class->size);
- }
+out:
local_unlock(&zs_map_area.lock);
-
zspage_read_unlock(zspage);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
+void zs_unmap_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+ struct zspage *zspage;
+ struct zpdesc *zpdesc;
+ unsigned long obj, off;
+ unsigned int obj_idx;
+ struct size_class *class;
+
+ obj = handle_to_obj(map->handle);
+ obj_to_location(obj, &zpdesc, &obj_idx);
+ zspage = get_zspage(zpdesc);
+ class = zspage_class(pool, zspage);
+ off = offset_in_page(class->size * obj_idx);
+
+ if (off + class->size <= PAGE_SIZE) {
+ kunmap_local(map->local_mapping);
+ goto out;
+ }
+
+ if (map->mode != ZS_MM_RO)
+ zs_obj_copyout(map->local_copy, zpdesc, off, class->size);
+
+out:
+ zspage_read_unlock(zspage);
+}
+EXPORT_SYMBOL_GPL(zs_unmap_handle);
+
+int zs_map_handle(struct zs_pool *pool, struct zs_handle_mapping *map)
+{
+ struct zspage *zspage;
+ struct zpdesc *zpdesc;
+ unsigned long obj, off;
+ unsigned int obj_idx;
+ struct size_class *class;
+
+ WARN_ON(in_interrupt());
+
+ /* It guarantees it can get zspage from handle safely */
+ pool_read_lock(pool);
+ obj = handle_to_obj(map->handle);
+ obj_to_location(obj, &zpdesc, &obj_idx);
+ zspage = get_zspage(zpdesc);
+
+ /*
+ * migration cannot move any zpages in this zspage. Here, class->lock
+ * is too heavy since callers would take some time until they calls
+ * zs_unmap_object API so delegate the locking from class to zspage
+ * which is smaller granularity.
+ */
+ zspage_read_lock(zspage);
+ pool_read_unlock(pool);
+
+ class = zspage_class(pool, zspage);
+ off = offset_in_page(class->size * obj_idx);
+
+ if (off + class->size <= PAGE_SIZE) {
+ /* this object is contained entirely within a page */
+ map->local_mapping = kmap_local_zpdesc(zpdesc);
+ map->handle_mem = map->local_mapping + off;
+ goto out;
+ }
+
+ if (WARN_ON_ONCE(!map->local_copy)) {
+ zspage_read_unlock(zspage);
+ return -EINVAL;
+ }
+
+ map->handle_mem = map->local_copy;
+ if (map->mode != ZS_MM_WO) {
+ /* this object spans two pages */
+ zs_obj_copyin(map->local_copy, zpdesc, off, class->size);
+ }
+
+out:
+ if (likely(!ZsHugePage(zspage)))
+ map->handle_mem += ZS_HANDLE_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(zs_map_handle);
+
/**
* zs_huge_class_size() - Returns the size (in bytes) of the first huge
* zsmalloc &size_class.
--
2.48.1.262.g85cc9f2d1e-goog
next prev parent reply other threads:[~2025-01-27 8:03 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-27 7:59 [RFC PATCH 0/6] zsmalloc: make zsmalloc preemptible Sergey Senozhatsky
2025-01-27 7:59 ` [RFC PATCH 1/6] zram: deffer slot free notification Sergey Senozhatsky
2025-01-27 7:59 ` [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible Sergey Senozhatsky
2025-01-27 20:23 ` Uros Bizjak
2025-01-28 0:29 ` Sergey Senozhatsky
2025-01-27 7:59 ` [RFC PATCH 3/6] zsmalloc: convert to sleepable pool lock Sergey Senozhatsky
2025-01-27 7:59 ` [RFC PATCH 4/6] zsmalloc: make class lock sleepable Sergey Senozhatsky
2025-01-27 7:59 ` Sergey Senozhatsky [this message]
2025-01-27 21:26 ` [RFC PATCH 5/6] zsmalloc: introduce handle mapping API Yosry Ahmed
2025-01-28 0:37 ` Sergey Senozhatsky
2025-01-28 0:49 ` Yosry Ahmed
2025-01-28 1:13 ` Sergey Senozhatsky
2025-01-27 21:58 ` Yosry Ahmed
2025-01-28 0:59 ` Sergey Senozhatsky
2025-01-28 1:36 ` Yosry Ahmed
2025-01-28 5:29 ` Sergey Senozhatsky
2025-01-28 9:38 ` Sergey Senozhatsky
2025-01-28 17:21 ` Yosry Ahmed
2025-01-29 3:32 ` Sergey Senozhatsky
2025-01-28 11:10 ` Sergey Senozhatsky
2025-01-28 17:22 ` Yosry Ahmed
2025-01-28 23:01 ` Sergey Senozhatsky
2025-01-29 5:40 ` Sergey Senozhatsky
2025-01-27 7:59 ` [RFC PATCH 6/6] zram: switch over to zshandle " Sergey Senozhatsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250127080254.1302026-6-senozhatsky@chromium.org \
--to=senozhatsky@chromium.org \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=minchan@kernel.org \
--cc=nphamcs@gmail.com \
--cc=yosry.ahmed@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox