linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Minchan Kim <minchan@kernel.org>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <umgwanakikbuti@gmail.com>,
	Mike Galbraith <efault@gmx.de>
Subject: [PATCH] mm/zsmalloc: Replace bit spinlock and get_cpu_var() usage.
Date: Tue, 28 Sep 2021 10:44:19 +0200	[thread overview]
Message-ID: <20210928084419.mkfu62barwrsvflq@linutronix.de> (raw)
In-Reply-To: <YU0IkoeWYLdll6/+@google.com>

From: Mike Galbraith <umgwanakikbuti@gmail.com>

For efficiency reasons, zsmalloc is using a slim `handle'. The value is
the address of a memory allocation of 4 or 8 bytes depending on the size
of the long data type. The lowest bit in that allocated memory is used
as a bit spin lock.
The usage of the bit spin lock is problematic because with the bit spin
lock held zsmalloc acquires a rwlock_t and spinlock_t which are both
sleeping locks on PREEMPT_RT and therefore must not be acquired with
disabled preemption.

Extend the handle to struct zsmalloc_handle which holds the old handle as
addr and a spinlock_t which replaces the bit spinlock. Replace all the
wrapper functions accordingly.

The usage of get_cpu_var() in zs_map_object() is problematic because
it disables preemption and makes it impossible to acquire any sleeping
lock on PREEMPT_RT such as a spinlock_t.
Replace the get_cpu_var() usage with a local_lock_t which is embedded
struct mapping_area. It ensures that the access the struct is
synchronized against all users on the same CPU.

This survived LTP testing.

Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var() and
 patch description. Mike then fixed the size magic and made handle lock
 spinlock_t.]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

Minchan, this is the replacement we have so far. There is something
similar for block/zram. By disabling zsmalloc we also have zram disabled
so no fallout.
To my best knowledge, the only usage/testing is LTP based.

 mm/zsmalloc.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 6 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 68e8831068f4b..22c18ac605b5f 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -57,6 +57,7 @@
 #include <linux/wait.h>
 #include <linux/pagemap.h>
 #include <linux/fs.h>
+#include <linux/local_lock.h>
 
 #define ZSPAGE_MAGIC	0x58
 
@@ -77,6 +78,20 @@
 
 #define ZS_HANDLE_SIZE (sizeof(unsigned long))
 
+#ifdef CONFIG_PREEMPT_RT
+
+struct zsmalloc_handle {
+	unsigned long addr;
+	spinlock_t lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
+
+#else
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
+#endif
+
 /*
  * Object location (<PFN>, <obj_idx>) is encoded as
  * a single (unsigned long) handle value.
@@ -293,6 +308,7 @@ struct zspage {
 };
 
 struct mapping_area {
+	local_lock_t lock;
 	char *vm_buf; /* copy buffer for objects that span pages */
 	char *vm_addr; /* address of kmap_atomic()'ed pages */
 	enum zs_mapmode vm_mm; /* mapping mode */
@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
 
 static int create_cache(struct zs_pool *pool)
 {
-	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
 					0, 0, NULL);
 	if (!pool->handle_cachep)
 		return 1;
@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool)
 
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
-	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+	void *p;
+
+	p = kmem_cache_alloc(pool->handle_cachep,
+			     gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+#ifdef CONFIG_PREEMPT_RT
+	if (p) {
+		struct zsmalloc_handle *zh = p;
+
+		spin_lock_init(&zh->lock);
+	}
+#endif
+	return (unsigned long)p;
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+	return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
 {
 	kmem_cache_free(pool->handle_cachep, (void *)handle);
@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
 
 static void record_obj(unsigned long handle, unsigned long obj)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	WRITE_ONCE(zh->addr, obj);
+#else
 	/*
 	 * lsb of @obj represents handle lock while other bits
 	 * represent object value the handle is pointing so
 	 * updating shouldn't do store tearing.
 	 */
 	WRITE_ONCE(*(unsigned long *)handle, obj);
+#endif
 }
 
 /* zpool driver */
@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
 #endif /* CONFIG_ZPOOL */
 
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
+	.lock	= INIT_LOCAL_LOCK(lock),
+};
 
 static bool is_zspage_isolated(struct zspage *zspage)
 {
@@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
 
 static unsigned long handle_to_obj(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return zh->addr;
+#else
 	return *(unsigned long *)handle;
+#endif
 }
 
 static unsigned long obj_to_head(struct page *page, void *obj)
@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
 
 static inline int testpin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_is_locked(&zh->lock);
+#else
 	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static inline int trypin_tag(unsigned long handle)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_trylock(&zh->lock);
+#else
 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void pin_tag(unsigned long handle) __acquires(bitlock)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_lock(&zh->lock);
+#else
 	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void unpin_tag(unsigned long handle) __releases(bitlock)
 {
+#ifdef CONFIG_PREEMPT_RT
+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
+	return spin_unlock(&zh->lock);
+#else
 	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
 }
 
 static void reset_page(struct page *page)
@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	class = pool->size_class[class_idx];
 	off = (class->size * obj_idx) & ~PAGE_MASK;
 
-	area = &get_cpu_var(zs_map_area);
+	local_lock(&zs_map_area.lock);
+	area = this_cpu_ptr(&zs_map_area);
 	area->vm_mm = mm;
 	if (off + class->size <= PAGE_SIZE) {
 		/* this object is contained entirely within a page */
@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 
 		__zs_unmap_object(area, pages, off, class->size);
 	}
-	put_cpu_var(zs_map_area);
+	local_unlock(&zs_map_area.lock);
 
 	migrate_read_unlock(zspage);
 	unpin_tag(handle);
-- 
2.33.0


  parent reply	other threads:[~2021-09-28  8:44 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-23 17:01 [PATCH] mm: Disable zsmalloc on PREEMPT_RT Sebastian Andrzej Siewior
2021-09-23 23:06 ` Minchan Kim
2021-09-24  7:08   ` Sebastian Andrzej Siewior
2021-09-28  8:44   ` Sebastian Andrzej Siewior [this message]
2021-09-28 22:47     ` [PATCH] mm/zsmalloc: Replace bit spinlock and get_cpu_var() usage Andrew Morton
2021-09-29  2:11       ` Mike Galbraith
2021-09-29  7:23       ` Sebastian Andrzej Siewior
2021-09-29 19:09         ` Minchan Kim
2021-09-30  6:42           ` Sebastian Andrzej Siewior

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210928084419.mkfu62barwrsvflq@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=akpm@linux-foundation.org \
    --cc=efault@gmx.de \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox