linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Sergey Senozhatsky <senozhatsky@chromium.org>
To: Andrew Morton <akpm@linux-foundation.org>,
	Minchan Kim <minchan@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Yosry Ahmed <yosry.ahmed@linux.dev>,
	Nhat Pham <nphamcs@gmail.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Sergey Senozhatsky <senozhatsky@chromium.org>
Subject: [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible
Date: Mon, 27 Jan 2025 16:59:27 +0900	[thread overview]
Message-ID: <20250127080254.1302026-3-senozhatsky@chromium.org> (raw)
In-Reply-To: <20250127080254.1302026-1-senozhatsky@chromium.org>

Switch over from rwlock_t to a atomic_t variable that takes
negative value when the page is under migration, or positive
values when the page is used by zsmalloc users (object map,
etc.)  Using a rwsem per-zspage is a little too memory heavy,
a simple atomic_t should suffice, after all we only need to
mark zspage as either used-for-write or used-for-read.  This
is needed to make zsmalloc preemtible in the future.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f8..28a75bfbeaa6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
 	__free_page(page);
 }
 
+#define ZS_PAGE_UNLOCKED	0
+#define ZS_PAGE_WRLOCKED	-1
+
 struct zspage {
 	struct {
 		unsigned int huge:HUGE_BITS;
@@ -269,7 +272,7 @@ struct zspage {
 	struct zpdesc *first_zpdesc;
 	struct list_head list; /* fullness list */
 	struct zs_pool *pool;
-	rwlock_t lock;
+	atomic_t lock;
 };
 
 struct mapping_area {
@@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
 	return zspage->huge;
 }
 
-static void migrate_lock_init(struct zspage *zspage);
-static void migrate_read_lock(struct zspage *zspage);
-static void migrate_read_unlock(struct zspage *zspage);
-static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_unlock(struct zspage *zspage);
+static void zspage_lock_init(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
+
+static void zspage_read_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_read(lock);
+		if (old == ZS_PAGE_WRLOCKED) {
+			cpu_relax();
+			continue;
+		}
+
+		if (atomic_cmpxchg(lock, old, old + 1) == old)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_read_unlock(struct zspage *zspage)
+{
+	atomic_dec(&zspage->lock);
+}
+
+static void zspage_write_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
+		if (old == ZS_PAGE_UNLOCKED)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_write_unlock(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
 
 #ifdef CONFIG_COMPACTION
 static void kick_deferred_free(struct zs_pool *pool);
@@ -992,7 +1037,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
 		return NULL;
 
 	zspage->magic = ZSPAGE_MAGIC;
-	migrate_lock_init(zspage);
+	zspage_lock_init(zspage);
 
 	for (i = 0; i < class->pages_per_zspage; i++) {
 		struct zpdesc *zpdesc;
@@ -1217,7 +1262,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	 * zs_unmap_object API so delegate the locking from class to zspage
 	 * which is smaller granularity.
 	 */
-	migrate_read_lock(zspage);
+	zspage_read_lock(zspage);
 	read_unlock(&pool->migrate_lock);
 
 	class = zspage_class(pool, zspage);
@@ -1277,7 +1322,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 	}
 	local_unlock(&zs_map_area.lock);
 
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
@@ -1671,18 +1716,18 @@ static void lock_zspage(struct zspage *zspage)
 	/*
 	 * Pages we haven't locked yet can be migrated off the list while we're
 	 * trying to lock them, so we need to be careful and only attempt to
-	 * lock each page under migrate_read_lock(). Otherwise, the page we lock
+	 * lock each page under zspage_read_lock(). Otherwise, the page we lock
 	 * may no longer belong to the zspage. This means that we may wait for
 	 * the wrong page to unlock, so we must take a reference to the page
-	 * prior to waiting for it to unlock outside migrate_read_lock().
+	 * prior to waiting for it to unlock outside zspage_read_lock().
 	 */
 	while (1) {
-		migrate_read_lock(zspage);
+		zspage_read_lock(zspage);
 		zpdesc = get_first_zpdesc(zspage);
 		if (zpdesc_trylock(zpdesc))
 			break;
 		zpdesc_get(zpdesc);
-		migrate_read_unlock(zspage);
+		zspage_read_unlock(zspage);
 		zpdesc_wait_locked(zpdesc);
 		zpdesc_put(zpdesc);
 	}
@@ -1693,41 +1738,16 @@ static void lock_zspage(struct zspage *zspage)
 			curr_zpdesc = zpdesc;
 		} else {
 			zpdesc_get(zpdesc);
-			migrate_read_unlock(zspage);
+			zspage_read_unlock(zspage);
 			zpdesc_wait_locked(zpdesc);
 			zpdesc_put(zpdesc);
-			migrate_read_lock(zspage);
+			zspage_read_lock(zspage);
 		}
 	}
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 #endif /* CONFIG_COMPACTION */
 
-static void migrate_lock_init(struct zspage *zspage)
-{
-	rwlock_init(&zspage->lock);
-}
-
-static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
-{
-	read_lock(&zspage->lock);
-}
-
-static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
-{
-	read_unlock(&zspage->lock);
-}
-
-static void migrate_write_lock(struct zspage *zspage)
-{
-	write_lock(&zspage->lock);
-}
-
-static void migrate_write_unlock(struct zspage *zspage)
-{
-	write_unlock(&zspage->lock);
-}
-
 #ifdef CONFIG_COMPACTION
 
 static const struct movable_operations zsmalloc_mops;
@@ -1803,8 +1823,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	 * the class lock protects zpage alloc/free in the zspage.
 	 */
 	spin_lock(&class->lock);
-	/* the migrate_write_lock protects zpage access via zs_map_object */
-	migrate_write_lock(zspage);
+	/* the zspage_write_lock protects zpage access via zs_map_object */
+	zspage_write_lock(zspage);
 
 	offset = get_first_obj_offset(zpdesc);
 	s_addr = kmap_local_zpdesc(zpdesc);
@@ -1835,7 +1855,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	 */
 	write_unlock(&pool->migrate_lock);
 	spin_unlock(&class->lock);
-	migrate_write_unlock(zspage);
+	zspage_write_unlock(zspage);
 
 	zpdesc_get(newzpdesc);
 	if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
@@ -1971,9 +1991,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 		if (!src_zspage)
 			break;
 
-		migrate_write_lock(src_zspage);
+		zspage_write_lock(src_zspage);
 		migrate_zspage(pool, src_zspage, dst_zspage);
-		migrate_write_unlock(src_zspage);
+		zspage_write_unlock(src_zspage);
 
 		fg = putback_zspage(class, src_zspage);
 		if (fg == ZS_INUSE_RATIO_0) {
-- 
2.48.1.262.g85cc9f2d1e-goog



  parent reply	other threads:[~2025-01-27  8:03 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-27  7:59 [RFC PATCH 0/6] zsmalloc: make zsmalloc preemptible Sergey Senozhatsky
2025-01-27  7:59 ` [RFC PATCH 1/6] zram: deffer slot free notification Sergey Senozhatsky
2025-01-27  7:59 ` Sergey Senozhatsky [this message]
2025-01-27 20:23   ` [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible Uros Bizjak
2025-01-28  0:29     ` Sergey Senozhatsky
2025-01-27  7:59 ` [RFC PATCH 3/6] zsmalloc: convert to sleepable pool lock Sergey Senozhatsky
2025-01-27  7:59 ` [RFC PATCH 4/6] zsmalloc: make class lock sleepable Sergey Senozhatsky
2025-01-27  7:59 ` [RFC PATCH 5/6] zsmalloc: introduce handle mapping API Sergey Senozhatsky
2025-01-27 21:26   ` Yosry Ahmed
2025-01-28  0:37     ` Sergey Senozhatsky
2025-01-28  0:49       ` Yosry Ahmed
2025-01-28  1:13         ` Sergey Senozhatsky
2025-01-27 21:58   ` Yosry Ahmed
2025-01-28  0:59     ` Sergey Senozhatsky
2025-01-28  1:36       ` Yosry Ahmed
2025-01-28  5:29         ` Sergey Senozhatsky
2025-01-28  9:38           ` Sergey Senozhatsky
2025-01-28 17:21             ` Yosry Ahmed
2025-01-29  3:32               ` Sergey Senozhatsky
2025-01-28 11:10           ` Sergey Senozhatsky
2025-01-28 17:22             ` Yosry Ahmed
2025-01-28 23:01               ` Sergey Senozhatsky
2025-01-29  5:40         ` Sergey Senozhatsky
2025-01-27  7:59 ` [RFC PATCH 6/6] zram: switch over to zshandle " Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250127080254.1302026-3-senozhatsky@chromium.org \
    --to=senozhatsky@chromium.org \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=nphamcs@gmail.com \
    --cc=yosry.ahmed@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox