linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] zsmalloc: use actual object size to detect spans
@ 2026-01-06  4:25 Sergey Senozhatsky
  2026-01-07  0:23 ` Yosry Ahmed
  0 siblings, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-06  4:25 UTC (permalink / raw)
  To: Andrew Morton, Yosry Ahmed, Nhat Pham
  Cc: Minchan Kim, Johannes Weiner, Brian Geffon, linux-kernel,
	linux-mm, Sergey Senozhatsky

Using class->size to detect spanning objects is not entirely correct,
because some size classes can hold a range of object sizes of up to
class->size bytes in length, due to size-classes merge.  Such classes
use padding for cases when actually written objects are smaller than
class->size.  zs_obj_read_begin() can incorrectly hit the slow path
and perform memcpy of such objects, basically copying padding bytes.
Instead of class->size zs_obj_read_begin() should use the actual
compressed object length (both zram and zswap know it) so that it can
correctly handle situations when a written object is small enough to
fit into the first physical page.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 drivers/block/zram/zram_drv.c | 14 ++++++++------
 include/linux/zsmalloc.h      |  4 ++--
 mm/zsmalloc.c                 | 16 ++++++++++++----
 mm/zswap.c                    |  5 +++--
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a6587bed6a03..76a54eabe889 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2065,11 +2065,11 @@ static int read_incompressible_page(struct zram *zram, struct page *page,
 	void *src, *dst;
 
 	handle = get_slot_handle(zram, index);
-	src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
+	src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
 	dst = kmap_local_page(page);
 	copy_page(dst, src);
 	kunmap_local(dst);
-	zs_obj_read_end(zram->mem_pool, handle, src);
+	zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
 
 	return 0;
 }
@@ -2087,11 +2087,12 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
 	prio = get_slot_comp_priority(zram, index);
 
 	zstrm = zcomp_stream_get(zram->comps[prio]);
-	src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
+	src = zs_obj_read_begin(zram->mem_pool, handle, size,
+				zstrm->local_copy);
 	dst = kmap_local_page(page);
 	ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
 	kunmap_local(dst);
-	zs_obj_read_end(zram->mem_pool, handle, src);
+	zs_obj_read_end(zram->mem_pool, handle, size, src);
 	zcomp_stream_put(zstrm);
 
 	return ret;
@@ -2114,9 +2115,10 @@ static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
 	 * takes place here, as we read raw compressed data.
 	 */
 	zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
-	src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
+	src = zs_obj_read_begin(zram->mem_pool, handle, size,
+				zstrm->local_copy);
 	memcpy_to_page(page, 0, src, size);
-	zs_obj_read_end(zram->mem_pool, handle, src);
+	zs_obj_read_end(zram->mem_pool, handle, size, src);
 	zcomp_stream_put(zstrm);
 
 	return 0;
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index f3ccff2d966c..5565c3171007 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -40,9 +40,9 @@ unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
 void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
 
 void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
-			void *local_copy);
+			size_t mem_len, void *local_copy);
 void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
-		     void *handle_mem);
+		     size_t mem_len, void *handle_mem);
 void zs_obj_write(struct zs_pool *pool, unsigned long handle,
 		  void *handle_mem, size_t mem_len);
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 84da164dcbc5..ab7767d9d87d 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1065,7 +1065,7 @@ unsigned long zs_get_total_pages(struct zs_pool *pool)
 EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
-			void *local_copy)
+			size_t mem_len, void *local_copy)
 {
 	struct zspage *zspage;
 	struct zpdesc *zpdesc;
@@ -1087,7 +1087,11 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
 	class = zspage_class(pool, zspage);
 	off = offset_in_page(class->size * obj_idx);
 
-	if (off + class->size <= PAGE_SIZE) {
+	/* Normal classes have inlined handle */
+	if (!ZsHugePage(zspage))
+		mem_len += ZS_HANDLE_SIZE;
+
+	if (off + mem_len <= PAGE_SIZE) {
 		/* this object is contained entirely within a page */
 		addr = kmap_local_zpdesc(zpdesc);
 		addr += off;
@@ -1115,7 +1119,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
 EXPORT_SYMBOL_GPL(zs_obj_read_begin);
 
 void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
-		     void *handle_mem)
+		     size_t mem_len, void *handle_mem)
 {
 	struct zspage *zspage;
 	struct zpdesc *zpdesc;
@@ -1129,7 +1133,11 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
 	class = zspage_class(pool, zspage);
 	off = offset_in_page(class->size * obj_idx);
 
-	if (off + class->size <= PAGE_SIZE) {
+	/* Normal classes have inlined handle */
+	if (!ZsHugePage(zspage))
+		mem_len += ZS_HANDLE_SIZE;
+
+	if (off + mem_len <= PAGE_SIZE) {
 		if (!ZsHugePage(zspage))
 			off += ZS_HANDLE_SIZE;
 		handle_mem -= off;
diff --git a/mm/zswap.c b/mm/zswap.c
index de8858ff1521..a3811b05ab57 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -937,7 +937,8 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	u8 *src, *obj;
 
 	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
-	obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer);
+	obj = zs_obj_read_begin(pool->zs_pool, entry->handle, entry->length,
+				acomp_ctx->buffer);
 
 	/* zswap entries of length PAGE_SIZE are not compressed. */
 	if (entry->length == PAGE_SIZE) {
@@ -966,7 +967,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	dlen = acomp_ctx->req->dlen;
 
 read_done:
-	zs_obj_read_end(pool->zs_pool, entry->handle, obj);
+	zs_obj_read_end(pool->zs_pool, entry->handle, entry->length, obj);
 	acomp_ctx_put_unlock(acomp_ctx);
 
 	if (!decomp_ret && dlen == PAGE_SIZE)
-- 
2.52.0.351.gbe84eed79e-goog



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-06  4:25 [PATCH] zsmalloc: use actual object size to detect spans Sergey Senozhatsky
@ 2026-01-07  0:23 ` Yosry Ahmed
  2026-01-07  0:59   ` Sergey Senozhatsky
  0 siblings, 1 reply; 15+ messages in thread
From: Yosry Ahmed @ 2026-01-07  0:23 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm

On Tue, Jan 06, 2026 at 01:25:07PM +0900, Sergey Senozhatsky wrote:
> Using class->size to detect spanning objects is not entirely correct,
> because some size classes can hold a range of object sizes of up to
> class->size bytes in length, due to size-classes merge.  Such classes
> use padding for cases when actually written objects are smaller than
> class->size.  zs_obj_read_begin() can incorrectly hit the slow path
> and perform memcpy of such objects, basically copying padding bytes.
> Instead of class->size zs_obj_read_begin() should use the actual
> compressed object length (both zram and zswap know it) so that it can
> correctly handle situations when a written object is small enough to
> fit into the first physical page.
> 
> Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
> ---
>  drivers/block/zram/zram_drv.c | 14 ++++++++------
>  include/linux/zsmalloc.h      |  4 ++--
>  mm/zsmalloc.c                 | 16 ++++++++++++----
>  mm/zswap.c                    |  5 +++--
>  4 files changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index a6587bed6a03..76a54eabe889 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -2065,11 +2065,11 @@ static int read_incompressible_page(struct zram *zram, struct page *page,
>  	void *src, *dst;
>  
>  	handle = get_slot_handle(zram, index);
> -	src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
> +	src = zs_obj_read_begin(zram->mem_pool, handle, PAGE_SIZE, NULL);
>  	dst = kmap_local_page(page);
>  	copy_page(dst, src);
>  	kunmap_local(dst);
> -	zs_obj_read_end(zram->mem_pool, handle, src);
> +	zs_obj_read_end(zram->mem_pool, handle, PAGE_SIZE, src);
>  
>  	return 0;
>  }
> @@ -2087,11 +2087,12 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
>  	prio = get_slot_comp_priority(zram, index);
>  
>  	zstrm = zcomp_stream_get(zram->comps[prio]);
> -	src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
> +	src = zs_obj_read_begin(zram->mem_pool, handle, size,
> +				zstrm->local_copy);
>  	dst = kmap_local_page(page);
>  	ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
>  	kunmap_local(dst);
> -	zs_obj_read_end(zram->mem_pool, handle, src);
> +	zs_obj_read_end(zram->mem_pool, handle, size, src);
>  	zcomp_stream_put(zstrm);
>  
>  	return ret;
> @@ -2114,9 +2115,10 @@ static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index)
>  	 * takes place here, as we read raw compressed data.
>  	 */
>  	zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
> -	src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
> +	src = zs_obj_read_begin(zram->mem_pool, handle, size,
> +				zstrm->local_copy);
>  	memcpy_to_page(page, 0, src, size);
> -	zs_obj_read_end(zram->mem_pool, handle, src);
> +	zs_obj_read_end(zram->mem_pool, handle, size, src);
>  	zcomp_stream_put(zstrm);
>  
>  	return 0;
> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
> index f3ccff2d966c..5565c3171007 100644
> --- a/include/linux/zsmalloc.h
> +++ b/include/linux/zsmalloc.h
> @@ -40,9 +40,9 @@ unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
>  void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
>  
>  void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> -			void *local_copy);
> +			size_t mem_len, void *local_copy);
>  void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> -		     void *handle_mem);
> +		     size_t mem_len, void *handle_mem);
>  void zs_obj_write(struct zs_pool *pool, unsigned long handle,
>  		  void *handle_mem, size_t mem_len);
>  
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 84da164dcbc5..ab7767d9d87d 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1065,7 +1065,7 @@ unsigned long zs_get_total_pages(struct zs_pool *pool)
>  EXPORT_SYMBOL_GPL(zs_get_total_pages);
>  
>  void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> -			void *local_copy)
> +			size_t mem_len, void *local_copy)
>  {
>  	struct zspage *zspage;
>  	struct zpdesc *zpdesc;
> @@ -1087,7 +1087,11 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>  	class = zspage_class(pool, zspage);
>  	off = offset_in_page(class->size * obj_idx);
>  
> -	if (off + class->size <= PAGE_SIZE) {
> +	/* Normal classes have inlined handle */
> +	if (!ZsHugePage(zspage))
> +		mem_len += ZS_HANDLE_SIZE;

Instead of modifying mem_len, can we modify 'off' like zs_obj_write()
and zs_obj_read_end()? I think this can actually be done as a prequel to
this patch. Arguably, it makes more sense as we avoid unnecessarily
copying the handle (completely untested):

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 5bf832f9c05c..48c288da43b8 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1087,6 +1087,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);

+       if (!ZsHugePage(zspage))
+               off += ZS_HANDLE_SIZE;
+
        if (off + class->size <= PAGE_SIZE) {
                /* this object is contained entirely within a page */
                addr = kmap_local_zpdesc(zpdesc);
@@ -1107,9 +1110,6 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
                                 0, sizes[1]);
        }

-       if (!ZsHugePage(zspage))
-               addr += ZS_HANDLE_SIZE;
-
        return addr;
 }
 EXPORT_SYMBOL_GPL(zs_obj_read_begin);
@@ -1129,9 +1129,10 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);

+       if (!ZsHugePage(zspage))
+               off += ZS_HANDLE_SIZE;
+
        if (off + class->size <= PAGE_SIZE) {
-               if (!ZsHugePage(zspage))
-                       off += ZS_HANDLE_SIZE;
                handle_mem -= off;
                kunmap_local(handle_mem);
        }

---
Does this work?

> +
> +	if (off + mem_len <= PAGE_SIZE) {
>  		/* this object is contained entirely within a page */
>  		addr = kmap_local_zpdesc(zpdesc);
>  		addr += off;

In the else case below (spanning object), should we also use mem_len
instead of class->size to determine the copy size?

> @@ -1115,7 +1119,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>  EXPORT_SYMBOL_GPL(zs_obj_read_begin);
>  
>  void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> -		     void *handle_mem)
> +		     size_t mem_len, void *handle_mem)
>  {
>  	struct zspage *zspage;
>  	struct zpdesc *zpdesc;
> @@ -1129,7 +1133,11 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
>  	class = zspage_class(pool, zspage);
>  	off = offset_in_page(class->size * obj_idx);
>  
> -	if (off + class->size <= PAGE_SIZE) {
> +	/* Normal classes have inlined handle */
> +	if (!ZsHugePage(zspage))
> +		mem_len += ZS_HANDLE_SIZE;
> +
> +	if (off + mem_len <= PAGE_SIZE) {
>  		if (!ZsHugePage(zspage))
>  			off += ZS_HANDLE_SIZE;

With the proposed prequel patch, I think we won't need to handle
ZS_HANDLE_SIZE twice here. WDYT? Did I miss sth?

>  		handle_mem -= off;
> diff --git a/mm/zswap.c b/mm/zswap.c
> index de8858ff1521..a3811b05ab57 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -937,7 +937,8 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>  	u8 *src, *obj;
>  
>  	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
> -	obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer);
> +	obj = zs_obj_read_begin(pool->zs_pool, entry->handle, entry->length,
> +				acomp_ctx->buffer);
>  
>  	/* zswap entries of length PAGE_SIZE are not compressed. */
>  	if (entry->length == PAGE_SIZE) {
> @@ -966,7 +967,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>  	dlen = acomp_ctx->req->dlen;
>  
>  read_done:
> -	zs_obj_read_end(pool->zs_pool, entry->handle, obj);
> +	zs_obj_read_end(pool->zs_pool, entry->handle, entry->length, obj);
>  	acomp_ctx_put_unlock(acomp_ctx);
>  
>  	if (!decomp_ret && dlen == PAGE_SIZE)
> -- 
> 2.52.0.351.gbe84eed79e-goog
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  0:23 ` Yosry Ahmed
@ 2026-01-07  0:59   ` Sergey Senozhatsky
  2026-01-07  1:37     ` Sergey Senozhatsky
  0 siblings, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  0:59 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 00:23), Yosry Ahmed wrote:
> Instead of modifying mem_len, can we modify 'off' like zs_obj_write()
> and zs_obj_read_end()? I think this can actually be done as a prequel to
> this patch. Arguably, it makes more sense as we avoid unnecessarily
> copying the handle (completely untested):
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 5bf832f9c05c..48c288da43b8 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1087,6 +1087,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>         class = zspage_class(pool, zspage);
>         off = offset_in_page(class->size * obj_idx);
> 
> +       if (!ZsHugePage(zspage))
> +               off += ZS_HANDLE_SIZE;
> +
>         if (off + class->size <= PAGE_SIZE) {
>                 /* this object is contained entirely within a page */
>                 addr = kmap_local_zpdesc(zpdesc);
> @@ -1107,9 +1110,6 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>                                  0, sizes[1]);
>         }
> 
> -       if (!ZsHugePage(zspage))
> -               addr += ZS_HANDLE_SIZE;
> -
>         return addr;
>  }
>  EXPORT_SYMBOL_GPL(zs_obj_read_begin);
> @@ -1129,9 +1129,10 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
>         class = zspage_class(pool, zspage);
>         off = offset_in_page(class->size * obj_idx);
> 
> +       if (!ZsHugePage(zspage))
> +               off += ZS_HANDLE_SIZE;
> +
>         if (off + class->size <= PAGE_SIZE) {
> -               if (!ZsHugePage(zspage))
> -                       off += ZS_HANDLE_SIZE;
>                 handle_mem -= off;
>                 kunmap_local(handle_mem);
>         }
> 
> ---
> Does this work?

Sounds interesting.  Let me try it out.

> > +
> > +	if (off + mem_len <= PAGE_SIZE) {
> >  		/* this object is contained entirely within a page */
> >  		addr = kmap_local_zpdesc(zpdesc);
> >  		addr += off;
> 
> In the else case below (spanning object), should we also use mem_len
> instead of class->size to determine the copy size?

Good catch.

> > @@ -1115,7 +1119,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> >  EXPORT_SYMBOL_GPL(zs_obj_read_begin);
> >  
> >  void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> > -		     void *handle_mem)
> > +		     size_t mem_len, void *handle_mem)
> >  {
> >  	struct zspage *zspage;
> >  	struct zpdesc *zpdesc;
> > @@ -1129,7 +1133,11 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> >  	class = zspage_class(pool, zspage);
> >  	off = offset_in_page(class->size * obj_idx);
> >  
> > -	if (off + class->size <= PAGE_SIZE) {
> > +	/* Normal classes have inlined handle */
> > +	if (!ZsHugePage(zspage))
> > +		mem_len += ZS_HANDLE_SIZE;
> > +
> > +	if (off + mem_len <= PAGE_SIZE) {
> >  		if (!ZsHugePage(zspage))
> >  			off += ZS_HANDLE_SIZE;
> 
> With the proposed prequel patch, I think we won't need to handle
> ZS_HANDLE_SIZE twice here. WDYT? Did I miss sth?

Let me look more closely.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  0:59   ` Sergey Senozhatsky
@ 2026-01-07  1:37     ` Sergey Senozhatsky
  2026-01-07  1:56       ` Yosry Ahmed
  0 siblings, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  1:37 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm, Sergey Senozhatsky

On (26/01/07 09:59), Sergey Senozhatsky wrote:
> On (26/01/07 00:23), Yosry Ahmed wrote:
> > Instead of modifying mem_len, can we modify 'off' like zs_obj_write()
> > and zs_obj_read_end()? I think this can actually be done as a prequel to
> > this patch. Arguably, it makes more sense as we avoid unnecessarily
> > copying the handle (completely untested):
> > 
> > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > index 5bf832f9c05c..48c288da43b8 100644
> > --- a/mm/zsmalloc.c
> > +++ b/mm/zsmalloc.c
> > @@ -1087,6 +1087,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> >         class = zspage_class(pool, zspage);
> >         off = offset_in_page(class->size * obj_idx);
> > 
> > +       if (!ZsHugePage(zspage))
> > +               off += ZS_HANDLE_SIZE;
> > +
> >         if (off + class->size <= PAGE_SIZE) {
> >                 /* this object is contained entirely within a page */
> >                 addr = kmap_local_zpdesc(zpdesc);
> > @@ -1107,9 +1110,6 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> >                                  0, sizes[1]);
> >         }
> > 
> > -       if (!ZsHugePage(zspage))
> > -               addr += ZS_HANDLE_SIZE;
> > -
> >         return addr;
> >  }
> >  EXPORT_SYMBOL_GPL(zs_obj_read_begin);
> > @@ -1129,9 +1129,10 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> >         class = zspage_class(pool, zspage);
> >         off = offset_in_page(class->size * obj_idx);
> > 
> > +       if (!ZsHugePage(zspage))
> > +               off += ZS_HANDLE_SIZE;
> > +
> >         if (off + class->size <= PAGE_SIZE) {
> > -               if (!ZsHugePage(zspage))
> > -                       off += ZS_HANDLE_SIZE;
> >                 handle_mem -= off;
> >                 kunmap_local(handle_mem);
> >         }
> > 
> > ---
> > Does this work?
> 
> Sounds interesting.  Let me try it out.

I recall us having exactly this idea when we first introduced
zs_obj_{read,write}_end() functions, and I do recall that it
did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
me dig into it again.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  1:37     ` Sergey Senozhatsky
@ 2026-01-07  1:56       ` Yosry Ahmed
  2026-01-07  2:06         ` Sergey Senozhatsky
  0 siblings, 1 reply; 15+ messages in thread
From: Yosry Ahmed @ 2026-01-07  1:56 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm

On Wed, Jan 07, 2026 at 10:37:24AM +0900, Sergey Senozhatsky wrote:
> On (26/01/07 09:59), Sergey Senozhatsky wrote:
> > On (26/01/07 00:23), Yosry Ahmed wrote:
> > > Instead of modifying mem_len, can we modify 'off' like zs_obj_write()
> > > and zs_obj_read_end()? I think this can actually be done as a prequel to
> > > this patch. Arguably, it makes more sense as we avoid unnecessarily
> > > copying the handle (completely untested):
> > > 
> > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > > index 5bf832f9c05c..48c288da43b8 100644
> > > --- a/mm/zsmalloc.c
> > > +++ b/mm/zsmalloc.c
> > > @@ -1087,6 +1087,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> > >         class = zspage_class(pool, zspage);
> > >         off = offset_in_page(class->size * obj_idx);
> > > 
> > > +       if (!ZsHugePage(zspage))
> > > +               off += ZS_HANDLE_SIZE;
> > > +
> > >         if (off + class->size <= PAGE_SIZE) {
> > >                 /* this object is contained entirely within a page */
> > >                 addr = kmap_local_zpdesc(zpdesc);
> > > @@ -1107,9 +1110,6 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> > >                                  0, sizes[1]);
> > >         }
> > > 
> > > -       if (!ZsHugePage(zspage))
> > > -               addr += ZS_HANDLE_SIZE;
> > > -
> > >         return addr;
> > >  }
> > >  EXPORT_SYMBOL_GPL(zs_obj_read_begin);
> > > @@ -1129,9 +1129,10 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> > >         class = zspage_class(pool, zspage);
> > >         off = offset_in_page(class->size * obj_idx);
> > > 
> > > +       if (!ZsHugePage(zspage))
> > > +               off += ZS_HANDLE_SIZE;
> > > +
> > >         if (off + class->size <= PAGE_SIZE) {
> > > -               if (!ZsHugePage(zspage))
> > > -                       off += ZS_HANDLE_SIZE;
> > >                 handle_mem -= off;
> > >                 kunmap_local(handle_mem);
> > >         }
> > > 
> > > ---
> > > Does this work?
> > 
> > Sounds interesting.  Let me try it out.
> 
> I recall us having exactly this idea when we first introduced
> zs_obj_{read,write}_end() functions, and I do recall that it
> did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
> me dig into it again.

Maybe because at this point we are trying to memcpy() class->size, which
already includes ZS_HANDLE_SIZE. So reading after increasing the offset
reads ZS_HANDLE_SIZE after class->size.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  1:56       ` Yosry Ahmed
@ 2026-01-07  2:06         ` Sergey Senozhatsky
  2026-01-07  2:10           ` Yosry Ahmed
  0 siblings, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  2:06 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 01:56), Yosry Ahmed wrote:
> > I recall us having exactly this idea when we first introduced
> > zs_obj_{read,write}_end() functions, and I do recall that it
> > did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
> > me dig into it again.
> 
> Maybe because at this point we are trying to memcpy() class->size, which
> already includes ZS_HANDLE_SIZE. So reading after increasing the offset
> reads ZS_HANDLE_SIZE after class->size.

Yeah, I guess that falsely hits the spanning path because of extra
sizeof(unsigned long).


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  2:06         ` Sergey Senozhatsky
@ 2026-01-07  2:10           ` Yosry Ahmed
  2026-01-07  2:20             ` Sergey Senozhatsky
  2026-01-07  3:03             ` Sergey Senozhatsky
  0 siblings, 2 replies; 15+ messages in thread
From: Yosry Ahmed @ 2026-01-07  2:10 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm

On Wed, Jan 07, 2026 at 11:06:09AM +0900, Sergey Senozhatsky wrote:
> On (26/01/07 01:56), Yosry Ahmed wrote:
> > > I recall us having exactly this idea when we first introduced
> > > zs_obj_{read,write}_end() functions, and I do recall that it
> > > did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
> > > me dig into it again.
> > 
> > Maybe because at this point we are trying to memcpy() class->size, which
> > already includes ZS_HANDLE_SIZE. So reading after increasing the offset
> > reads ZS_HANDLE_SIZE after class->size.
> 
> Yeah, I guess that falsely hits the spanning path because of extra
> sizeof(unsigned long).

Or the object could be spanning two pages indeed, but we're copying
extra sizeof(unsigned long), that shouldn't crash tho.

I think the changes need to be shuffled around to avoid this, or just
have a combined patch, which would be less pretty.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  2:10           ` Yosry Ahmed
@ 2026-01-07  2:20             ` Sergey Senozhatsky
  2026-01-07  2:22               ` Sergey Senozhatsky
  2026-01-07  5:19               ` Yosry Ahmed
  2026-01-07  3:03             ` Sergey Senozhatsky
  1 sibling, 2 replies; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  2:20 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 02:10), Yosry Ahmed wrote:
> On Wed, Jan 07, 2026 at 11:06:09AM +0900, Sergey Senozhatsky wrote:
> > On (26/01/07 01:56), Yosry Ahmed wrote:
> > > > I recall us having exactly this idea when we first introduced
> > > > zs_obj_{read,write}_end() functions, and I do recall that it
> > > > did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
> > > > me dig into it again.
> > > 
> > > Maybe because at this point we are trying to memcpy() class->size, which
> > > already includes ZS_HANDLE_SIZE. So reading after increasing the offset
> > > reads ZS_HANDLE_SIZE after class->size.
> > 
> > Yeah, I guess that falsely hits the spanning path because of extra
> > sizeof(unsigned long).
> 
> Or the object could be spanning two pages indeed, but we're copying
> extra sizeof(unsigned long), that shouldn't crash tho.

It seems there is no second page, it's a pow-of-two size class.  So
we mis-detect spanning.

[   51.406310] zsmalloc: :: size class 48, orig offt 16336, page size 16384, memcpy sizes 40, 8
[   51.407571] Unable to handle kernel paging request at virtual address ffffc04000000000
[   51.420816] pc : __memcpy+0xc/0x44

Second memcpy() of sizeof(unsigned long) traps.

> I think the changes need to be shuffled around to avoid this, or just
> have a combined patch, which would be less pretty.

I think I prefer a shuffle.

There is another possible improvement point (UNTESTED): if the first
page holds only ZS_HANDLE bytes, then we can avoid memcpy() path and
instead just kmap the second page + offset.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  2:20             ` Sergey Senozhatsky
@ 2026-01-07  2:22               ` Sergey Senozhatsky
  2026-01-07  5:19               ` Yosry Ahmed
  1 sibling, 0 replies; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  2:22 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Yosry Ahmed, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 11:20), Sergey Senozhatsky wrote:
> It seems there is no second page, it's a pow-of-two size class

Oh, class 48.  That's certainly not quite pow of 2...


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  2:10           ` Yosry Ahmed
  2026-01-07  2:20             ` Sergey Senozhatsky
@ 2026-01-07  3:03             ` Sergey Senozhatsky
  2026-01-07  5:22               ` Yosry Ahmed
  1 sibling, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  3:03 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 02:10), Yosry Ahmed wrote:
> I think the changes need to be shuffled around to avoid this, or just
> have a combined patch, which would be less pretty.

Dunno.  Do we want to completely separate HugePage handling
and make it a fast path?  Then it seems things begin to work.

---

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index cb449acc8809..9b067853b6c2 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1077,6 +1077,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
        unsigned long obj, off;
        unsigned int obj_idx;
        struct size_class *class;
+       size_t sizes[2];
        void *addr;
 
        /* Guarantee we can get zspage from handle safely */
@@ -1089,35 +1090,27 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
        zspage_read_lock(zspage);
        read_unlock(&pool->lock);
 
+       /* Fast path for huge size class */
+       if (ZsHugePage(zspage))
+               return kmap_local_zpdesc(zpdesc);
+
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);
 
-       /* Normal classes have inlined handle */
-       if (!ZsHugePage(zspage))
-               mem_len += ZS_HANDLE_SIZE;
-
+       off += ZS_HANDLE_SIZE;
        if (off + mem_len <= PAGE_SIZE) {
                /* this object is contained entirely within a page */
-               addr = kmap_local_zpdesc(zpdesc);
-               addr += off;
-       } else {
-               size_t sizes[2];
-
-               /* this object spans two pages */
-               sizes[0] = PAGE_SIZE - off;
-               sizes[1] = mem_len - sizes[0];
-               addr = local_copy;
-
-               memcpy_from_page(addr, zpdesc_page(zpdesc),
-                                off, sizes[0]);
-               zpdesc = get_next_zpdesc(zpdesc);
-               memcpy_from_page(addr + sizes[0],
-                                zpdesc_page(zpdesc),
-                                0, sizes[1]);
+               return kmap_local_zpdesc(zpdesc) + off;
        }
 
-       if (!ZsHugePage(zspage))
-               addr += ZS_HANDLE_SIZE;
+       /* this object spans two pages */
+       sizes[0] = PAGE_SIZE - off;
+       sizes[1] = mem_len - sizes[0];
+       addr = local_copy;
+
+       memcpy_from_page(addr, zpdesc_page(zpdesc), off, sizes[0]);
+       zpdesc = get_next_zpdesc(zpdesc);
+       memcpy_from_page(addr + sizes[0], zpdesc_page(zpdesc), 0, sizes[1]);
 
        return addr;
 }
@@ -1135,20 +1128,21 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
        obj = handle_to_obj(handle);
        obj_to_location(obj, &zpdesc, &obj_idx);
        zspage = get_zspage(zpdesc);
+
+       /* Fast path for huge size class */
+       if (ZsHugePage(zspage)) {
+               kunmap_local(handle_mem);
+               goto unlock;
+       }
+
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);
 
-       /* Normal classes have inlined handle */
-       if (!ZsHugePage(zspage))
-               mem_len += ZS_HANDLE_SIZE;
-
-       if (off + mem_len <= PAGE_SIZE) {
-               if (!ZsHugePage(zspage))
-                       off += ZS_HANDLE_SIZE;
-               handle_mem -= off;
+       off += ZS_HANDLE_SIZE;
+       if (off + mem_len <= PAGE_SIZE)
                kunmap_local(handle_mem);
-       }
 
+unlock:
        zspage_read_unlock(zspage);
 }
 EXPORT_SYMBOL_GPL(zs_obj_read_end);


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  2:20             ` Sergey Senozhatsky
  2026-01-07  2:22               ` Sergey Senozhatsky
@ 2026-01-07  5:19               ` Yosry Ahmed
  2026-01-07  5:30                 ` Sergey Senozhatsky
  1 sibling, 1 reply; 15+ messages in thread
From: Yosry Ahmed @ 2026-01-07  5:19 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm

On Wed, Jan 07, 2026 at 11:20:20AM +0900, Sergey Senozhatsky wrote:
> On (26/01/07 02:10), Yosry Ahmed wrote:
> > On Wed, Jan 07, 2026 at 11:06:09AM +0900, Sergey Senozhatsky wrote:
> > > On (26/01/07 01:56), Yosry Ahmed wrote:
> > > > > I recall us having exactly this idea when we first introduced
> > > > > zs_obj_{read,write}_end() functions, and I do recall that it
> > > > > did not work.  Somehow this panics in __memcpy+0xc/0x44.  Let
> > > > > me dig into it again.
> > > > 
> > > > Maybe because at this point we are trying to memcpy() class->size, which
> > > > already includes ZS_HANDLE_SIZE. So reading after increasing the offset
> > > > reads ZS_HANDLE_SIZE after class->size.
> > > 
> > > Yeah, I guess that falsely hits the spanning path because of extra
> > > sizeof(unsigned long).
> > 
> > Or the object could be spanning two pages indeed, but we're copying
> > extra sizeof(unsigned long), that shouldn't crash tho.
> 
> It seems there is no second page, it's a pow-of-two size class.  So
> we mis-detect spanning.
> 
> [   51.406310] zsmalloc: :: size class 48, orig offt 16336, page size 16384, memcpy sizes 40, 8
> [   51.407571] Unable to handle kernel paging request at virtual address ffffc04000000000
> [   51.420816] pc : __memcpy+0xc/0x44
> 
> Second memcpy() of sizeof(unsigned long) traps.

I think this case is exactly what you expected earlier (not sure what
you mean by the pow of 2 reply). We increase the offset by 8 bytes
(ZS_HANDLE_SIZE), but we still copy 48 bytes, even though 48 bytes
includes both the object and ZS_HANDLE_SIZE. So we end up copying 8
bytes beyond the end of the object, which puts us in the next page which
we should not be copying.

I think to fix the bug at this point we need to subtract ZS_HANDLE_SIZE
from class->size before we use it for copying or spanning detection.

Something like (untested):

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 5bf832f9c05c..894783d2526c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1072,6 +1072,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
        unsigned long obj, off;
        unsigned int obj_idx;
        struct size_class *class;
+       unsigned long size;
        void *addr;

        /* Guarantee we can get zspage from handle safely */
@@ -1087,7 +1088,13 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);

-       if (off + class->size <= PAGE_SIZE) {
+       size = class->size;
+       if (!ZsHugePage(zspage)) {
+               off += ZS_HANDLE_SIZE;
+               size -= ZS_HANDLE_SIZE;
+       }
+
+       if (off + size <= PAGE_SIZE) {
                /* this object is contained entirely within a page */
                addr = kmap_local_zpdesc(zpdesc);
                addr += off;
@@ -1096,7 +1103,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,

                /* this object spans two pages */
                sizes[0] = PAGE_SIZE - off;
-               sizes[1] = class->size - sizes[0];
+               sizes[1] = size - sizes[0];
                addr = local_copy;

                memcpy_from_page(addr, zpdesc_page(zpdesc),
@@ -1107,9 +1114,6 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
                                 0, sizes[1]);
        }

-       if (!ZsHugePage(zspage))
-               addr += ZS_HANDLE_SIZE;
-
        return addr;
 }
 EXPORT_SYMBOL_GPL(zs_obj_read_begin);
@@ -1121,6 +1125,7 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
        struct zpdesc *zpdesc;
        unsigned long obj, off;
        unsigned int obj_idx;
+       unsigned long size;
        struct size_class *class;

        obj = handle_to_obj(handle);
@@ -1129,9 +1134,13 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
        class = zspage_class(pool, zspage);
        off = offset_in_page(class->size * obj_idx);

-       if (off + class->size <= PAGE_SIZE) {
-               if (!ZsHugePage(zspage))
-                       off += ZS_HANDLE_SIZE;
+       size = class->size;
+       if (!ZsHugePage(zspage)) {
+               off += ZS_HANDLE_SIZE;
+               size -= ZS_HANDLE_SIZE;
+       }
+
+       if (off + size <= PAGE_SIZE) {
                handle_mem -= off;
                kunmap_local(handle_mem);
        }

> 
> > I think the changes need to be shuffled around to avoid this, or just
> > have a combined patch, which would be less pretty.
> 
> I think I prefer a shuffle.
> 
> There is another possible improvement point (UNTESTED): if the first
> page holds only ZS_HANDLE bytes, then we can avoid memcpy() path and
> instead just kmap the second page + offset.

Yeah good point.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  3:03             ` Sergey Senozhatsky
@ 2026-01-07  5:22               ` Yosry Ahmed
  2026-01-07  5:38                 ` Sergey Senozhatsky
  0 siblings, 1 reply; 15+ messages in thread
From: Yosry Ahmed @ 2026-01-07  5:22 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm

On Wed, Jan 07, 2026 at 12:03:37PM +0900, Sergey Senozhatsky wrote:
> On (26/01/07 02:10), Yosry Ahmed wrote:
> > I think the changes need to be shuffled around to avoid this, or just
> > have a combined patch, which would be less pretty.
> 
> Dunno.  Do we want to completely separate HugePage handling
> and make it a fast path?  Then it seems things begin to work.

HugePage should always be PAGE_SIZE, so never spans two pages, right?

I like separating the logic because it's cleaner, but I want us to
understand the problem first (see my other reply) instead of just
papering over it.

> 
> ---
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index cb449acc8809..9b067853b6c2 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1077,6 +1077,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>         unsigned long obj, off;
>         unsigned int obj_idx;
>         struct size_class *class;
> +       size_t sizes[2];
>         void *addr;
>  
>         /* Guarantee we can get zspage from handle safely */
> @@ -1089,35 +1090,27 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>         zspage_read_lock(zspage);
>         read_unlock(&pool->lock);
>  
> +       /* Fast path for huge size class */
> +       if (ZsHugePage(zspage))
> +               return kmap_local_zpdesc(zpdesc);

Can we WARN here if somehow the HugePage is spanning two pages?

> +
>         class = zspage_class(pool, zspage);
>         off = offset_in_page(class->size * obj_idx);
>  
> -       /* Normal classes have inlined handle */
> -       if (!ZsHugePage(zspage))
> -               mem_len += ZS_HANDLE_SIZE;
> -
> +       off += ZS_HANDLE_SIZE;
>         if (off + mem_len <= PAGE_SIZE) {
>                 /* this object is contained entirely within a page */
> -               addr = kmap_local_zpdesc(zpdesc);
> -               addr += off;
> -       } else {
> -               size_t sizes[2];
> -
> -               /* this object spans two pages */
> -               sizes[0] = PAGE_SIZE - off;
> -               sizes[1] = mem_len - sizes[0];
> -               addr = local_copy;
> -
> -               memcpy_from_page(addr, zpdesc_page(zpdesc),
> -                                off, sizes[0]);
> -               zpdesc = get_next_zpdesc(zpdesc);
> -               memcpy_from_page(addr + sizes[0],
> -                                zpdesc_page(zpdesc),
> -                                0, sizes[1]);
> +               return kmap_local_zpdesc(zpdesc) + off;
>         }
>  
> -       if (!ZsHugePage(zspage))
> -               addr += ZS_HANDLE_SIZE;
> +       /* this object spans two pages */
> +       sizes[0] = PAGE_SIZE - off;
> +       sizes[1] = mem_len - sizes[0];
> +       addr = local_copy;
> +
> +       memcpy_from_page(addr, zpdesc_page(zpdesc), off, sizes[0]);
> +       zpdesc = get_next_zpdesc(zpdesc);
> +       memcpy_from_page(addr + sizes[0], zpdesc_page(zpdesc), 0, sizes[1]);
>  
>         return addr;
>  }
> @@ -1135,20 +1128,21 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
>         obj = handle_to_obj(handle);
>         obj_to_location(obj, &zpdesc, &obj_idx);
>         zspage = get_zspage(zpdesc);
> +
> +       /* Fast path for huge size class */
> +       if (ZsHugePage(zspage)) {
> +               kunmap_local(handle_mem);
> +               goto unlock;
> +       }
> +
>         class = zspage_class(pool, zspage);
>         off = offset_in_page(class->size * obj_idx);
>  
> -       /* Normal classes have inlined handle */
> -       if (!ZsHugePage(zspage))
> -               mem_len += ZS_HANDLE_SIZE;
> -
> -       if (off + mem_len <= PAGE_SIZE) {
> -               if (!ZsHugePage(zspage))
> -                       off += ZS_HANDLE_SIZE;
> -               handle_mem -= off;
> +       off += ZS_HANDLE_SIZE;
> +       if (off + mem_len <= PAGE_SIZE)
>                 kunmap_local(handle_mem);
> -       }
>  
> +unlock:
>         zspage_read_unlock(zspage);
>  }
>  EXPORT_SYMBOL_GPL(zs_obj_read_end);


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  5:19               ` Yosry Ahmed
@ 2026-01-07  5:30                 ` Sergey Senozhatsky
  2026-01-07  7:12                   ` Sergey Senozhatsky
  0 siblings, 1 reply; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  5:30 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 05:19), Yosry Ahmed wrote:
> > It seems there is no second page, it's a pow-of-two size class.  So
> > we mis-detect spanning.
> > 
> > [   51.406310] zsmalloc: :: size class 48, orig offt 16336, page size 16384, memcpy sizes 40, 8
> > [   51.407571] Unable to handle kernel paging request at virtual address ffffc04000000000
> > [   51.420816] pc : __memcpy+0xc/0x44
> > 
> > Second memcpy() of sizeof(unsigned long) traps.
> 
> I think this case is exactly what you expected earlier (not sure what
> you mean by the pow of 2 reply). 

So "pow-of-two" I was just misguided by the second memcpy() size being
8 (which looked like the whole object fits exactly into its physical
page, which I thought was happening only with pow-of-two sizes).

> We increase the offset by 8 bytes (ZS_HANDLE_SIZE), but we still copy 48
> bytes, even though 48 bytes includes both the object and ZS_HANDLE_SIZE.
> So we end up copying 8 bytes beyond the end of the object, which puts us
> in the next page which we should not be copying.

Correct.  We increased it twice: off +8 and mem_len +8.

> I think to fix the bug at this point we need to subtract ZS_HANDLE_SIZE
> from class->size before we use it for copying or spanning detection.

I just re-shuffled the patches and it seems to be working, passes all
my tests.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  5:22               ` Yosry Ahmed
@ 2026-01-07  5:38                 ` Sergey Senozhatsky
  0 siblings, 0 replies; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  5:38 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Sergey Senozhatsky, Andrew Morton, Nhat Pham, Minchan Kim,
	Johannes Weiner, Brian Geffon, linux-kernel, linux-mm

On (26/01/07 05:22), Yosry Ahmed wrote:
> On Wed, Jan 07, 2026 at 12:03:37PM +0900, Sergey Senozhatsky wrote:
> > On (26/01/07 02:10), Yosry Ahmed wrote:
> > > I think the changes need to be shuffled around to avoid this, or just
> > > have a combined patch, which would be less pretty.
> > 
> > Dunno.  Do we want to completely separate HugePage handling
> > and make it a fast path?  Then it seems things begin to work.
> 
> HugePage should always be PAGE_SIZE, so never spans two pages, right?

Right

  if (unlikely(class->objs_per_zspage == 1 && class->pages_per_zspage == 1))
      SetZsHugePage(zspage);

> I like separating the logic because it's cleaner, but I want us to
> understand the problem first (see my other reply) instead of just
> papering over it.

Sure.

> > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > index cb449acc8809..9b067853b6c2 100644
> > --- a/mm/zsmalloc.c
> > +++ b/mm/zsmalloc.c
> > @@ -1077,6 +1077,7 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> >         unsigned long obj, off;
> >         unsigned int obj_idx;
> >         struct size_class *class;
> > +       size_t sizes[2];
> >         void *addr;
> >  
> >         /* Guarantee we can get zspage from handle safely */
> > @@ -1089,35 +1090,27 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> >         zspage_read_lock(zspage);
> >         read_unlock(&pool->lock);
> >  
> > +       /* Fast path for huge size class */
> > +       if (ZsHugePage(zspage))
> > +               return kmap_local_zpdesc(zpdesc);
> 
> Can we WARN here if somehow the HugePage is spanning two pages?

I can add a WARN, but that really cannot happen.  We always allocate just
one physical page per zspage for such size classes.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] zsmalloc: use actual object size to detect spans
  2026-01-07  5:30                 ` Sergey Senozhatsky
@ 2026-01-07  7:12                   ` Sergey Senozhatsky
  0 siblings, 0 replies; 15+ messages in thread
From: Sergey Senozhatsky @ 2026-01-07  7:12 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Nhat Pham, Minchan Kim, Johannes Weiner,
	Brian Geffon, linux-kernel, linux-mm, Sergey Senozhatsky

A correction:

On (26/01/07 14:30), Sergey Senozhatsky wrote:
> > We increase the offset by 8 bytes (ZS_HANDLE_SIZE), but we still copy 48
> > bytes, even though 48 bytes includes both the object and ZS_HANDLE_SIZE.
> > So we end up copying 8 bytes beyond the end of the object, which puts us
> > in the next page which we should not be copying.
> 
> Correct.  We increased it twice: off +8 and mem_len +8.
                                              ^^ class->size (implicit +8)

I tested your patch as is, w/o applying mine.


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2026-01-07  7:12 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-06  4:25 [PATCH] zsmalloc: use actual object size to detect spans Sergey Senozhatsky
2026-01-07  0:23 ` Yosry Ahmed
2026-01-07  0:59   ` Sergey Senozhatsky
2026-01-07  1:37     ` Sergey Senozhatsky
2026-01-07  1:56       ` Yosry Ahmed
2026-01-07  2:06         ` Sergey Senozhatsky
2026-01-07  2:10           ` Yosry Ahmed
2026-01-07  2:20             ` Sergey Senozhatsky
2026-01-07  2:22               ` Sergey Senozhatsky
2026-01-07  5:19               ` Yosry Ahmed
2026-01-07  5:30                 ` Sergey Senozhatsky
2026-01-07  7:12                   ` Sergey Senozhatsky
2026-01-07  3:03             ` Sergey Senozhatsky
2026-01-07  5:22               ` Yosry Ahmed
2026-01-07  5:38                 ` Sergey Senozhatsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox