linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nhat Pham <nphamcs@gmail.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org,
	yosry.ahmed@linux.dev, chengming.zhou@linux.dev, sj@kernel.org,
	kernel-team@meta.com, linux-kernel@vger.kernel.org,
	gourry@gourry.net, willy@infradead.org,
	ying.huang@linux.alibaba.com, jonathan.cameron@huawei.com,
	dan.j.williams@intel.com, linux-cxl@vger.kernel.org,
	minchan@kernel.org, senozhatsky@chromium.org
Subject: [RFC PATCH 1/2] zsmalloc: let callers select NUMA node to store the compressed objects
Date: Sat, 29 Mar 2025 04:02:29 -0700	[thread overview]
Message-ID: <20250329110230.2459730-2-nphamcs@gmail.com> (raw)
In-Reply-To: <20250329110230.2459730-1-nphamcs@gmail.com>

Curerntly, zsmalloc does not specify any memory policy when it allocates
memory for the compressed objects.

Let users select the NUMA node for the memory allocation, through the
zpool-based API. Direct callers (i.e zram) should not observe any
behavioral change.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/zpool.h |  4 ++--
 mm/zpool.c            |  8 +++++---
 mm/zsmalloc.c         | 28 +++++++++++++++++++++-------
 mm/zswap.c            |  2 +-
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 52f30e526607..0df8722e13d7 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -22,7 +22,7 @@ const char *zpool_get_type(struct zpool *pool);
 void zpool_destroy_pool(struct zpool *pool);
 
 int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
-			unsigned long *handle);
+			unsigned long *handle, int *nid);
 
 void zpool_free(struct zpool *pool, unsigned long handle);
 
@@ -64,7 +64,7 @@ struct zpool_driver {
 	void (*destroy)(void *pool);
 
 	int (*malloc)(void *pool, size_t size, gfp_t gfp,
-				unsigned long *handle);
+				unsigned long *handle, int *nid);
 	void (*free)(void *pool, unsigned long handle);
 
 	void *(*obj_read_begin)(void *pool, unsigned long handle,
diff --git a/mm/zpool.c b/mm/zpool.c
index 6d6d88930932..591a13b99755 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -226,20 +226,22 @@ const char *zpool_get_type(struct zpool *zpool)
  * @size:	The amount of memory to allocate.
  * @gfp:	The GFP flags to use when allocating memory.
  * @handle:	Pointer to the handle to set
+ * @nid:	Pointer to the preferred node id.
  *
  * This allocates the requested amount of memory from the pool.
  * The gfp flags will be used when allocating memory, if the
  * implementation supports it.  The provided @handle will be
- * set to the allocated object handle.
+ * set to the allocated object handle. If @nid is provided, the
+ * allocation will prefer the specified node.
  *
  * Implementations must guarantee this to be thread-safe.
  *
  * Returns: 0 on success, negative value on error.
  */
 int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
-			unsigned long *handle)
+			unsigned long *handle, int *nid)
 {
-	return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+	return zpool->driver->malloc(zpool->pool, size, gfp, handle, nid);
 }
 
 /**
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 961b270f023c..35f61f14c32e 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -243,9 +243,14 @@ static inline void zpdesc_dec_zone_page_state(struct zpdesc *zpdesc)
 	dec_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES);
 }
 
-static inline struct zpdesc *alloc_zpdesc(gfp_t gfp)
+static inline struct zpdesc *alloc_zpdesc(gfp_t gfp, int *nid)
 {
-	struct page *page = alloc_page(gfp);
+	struct page *page;
+
+	if (nid)
+		page = alloc_pages_node(*nid, gfp, 0);
+	else
+		page = alloc_page(gfp);
 
 	return page_zpdesc(page);
 }
@@ -461,10 +466,13 @@ static void zs_zpool_destroy(void *pool)
 	zs_destroy_pool(pool);
 }
 
+static unsigned long zs_malloc_node(struct zs_pool *pool, size_t size,
+				gfp_t gfp, int *nid);
+
 static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
-			unsigned long *handle)
+			unsigned long *handle, int *nid)
 {
-	*handle = zs_malloc(pool, size, gfp);
+	*handle = zs_malloc_node(pool, size, gfp, nid);
 
 	if (IS_ERR_VALUE(*handle))
 		return PTR_ERR((void *)*handle);
@@ -1044,7 +1052,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
  */
 static struct zspage *alloc_zspage(struct zs_pool *pool,
 					struct size_class *class,
-					gfp_t gfp)
+					gfp_t gfp, int *nid)
 {
 	int i;
 	struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE];
@@ -1061,7 +1069,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
 	for (i = 0; i < class->pages_per_zspage; i++) {
 		struct zpdesc *zpdesc;
 
-		zpdesc = alloc_zpdesc(gfp);
+		zpdesc = alloc_zpdesc(gfp, nid);
 		if (!zpdesc) {
 			while (--i >= 0) {
 				zpdesc_dec_zone_page_state(zpdescs[i]);
@@ -1342,6 +1350,12 @@ static unsigned long obj_malloc(struct zs_pool *pool,
  * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
  */
 unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+{
+	return zs_malloc_node(pool, size, gfp, NULL);
+}
+
+static unsigned long zs_malloc_node(struct zs_pool *pool, size_t size,
+				gfp_t gfp, int *nid)
 {
 	unsigned long handle;
 	struct size_class *class;
@@ -1376,7 +1390,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 
 	spin_unlock(&class->lock);
 
-	zspage = alloc_zspage(pool, class, gfp);
+	zspage = alloc_zspage(pool, class, gfp, nid);
 	if (!zspage) {
 		cache_free_handle(pool, handle);
 		return (unsigned long)ERR_PTR(-ENOMEM);
diff --git a/mm/zswap.c b/mm/zswap.c
index 204fb59da33c..89b6d4ade4cd 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -981,7 +981,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 
 	zpool = pool->zpool;
 	gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE;
-	alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle);
+	alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle, NULL);
 	if (alloc_ret)
 		goto unlock;
 
-- 
2.47.1



  reply	other threads:[~2025-03-29 11:02 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-29 11:02 [RFC PATCH 0/2] zswap: fix placement inversion in memory tiering systems Nhat Pham
2025-03-29 11:02 ` Nhat Pham [this message]
2025-03-31 22:17   ` [RFC PATCH 1/2] zsmalloc: let callers select NUMA node to store the compressed objects Dan Williams
2025-03-31 23:03     ` Nhat Pham
2025-03-31 23:22       ` Dan Williams
2025-04-01  1:13         ` Nhat Pham
2025-03-29 11:02 ` [RFC PATCH 2/2] zswap: add sysfs knob for same node mode Nhat Pham
2025-03-29 19:53 ` [RFC PATCH 0/2] zswap: fix placement inversion in memory tiering systems Yosry Ahmed
2025-03-29 22:13   ` Nhat Pham
2025-03-29 22:17     ` Nhat Pham
2025-03-31 16:53   ` Johannes Weiner
2025-03-31 17:32     ` Nhat Pham
2025-03-31 17:06   ` Gregory Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250329110230.2459730-2-nphamcs@gmail.com \
    --to=nphamcs@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chengming.zhou@linux.dev \
    --cc=dan.j.williams@intel.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=jonathan.cameron@huawei.com \
    --cc=kernel-team@meta.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=senozhatsky@chromium.org \
    --cc=sj@kernel.org \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yosry.ahmed@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox