linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Hillf Danton <hdanton@sina.com>
To: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>,
	Kairui Song <ryncsn@gmail.com>, Minchan Kim <minchan@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v5 12/18] zsmalloc: make zspage lock preemptible
Date: Thu, 13 Feb 2025 19:32:47 +0800	[thread overview]
Message-ID: <20250213113248.2225-1-hdanton@sina.com> (raw)
In-Reply-To: <20250212063153.179231-13-senozhatsky@chromium.org>

On Wed, 12 Feb 2025 15:27:10 +0900 Sergey Senozhatsky
> Switch over from rwlock_t to a atomic_t variable that takes negative
> value when the page is under migration, or positive values when the
> page is used by zsmalloc users (object map, etc.)   Using a rwsem
> per-zspage is a little too memory heavy, a simple atomic_t should
> suffice.
> 
> zspage lock is a leaf lock for zs_map_object(), where it's read-acquired.
> Since this lock now permits preemption extra care needs to be taken when
> it is write-acquired - all writers grab it in atomic context, so they
> cannot spin and wait for (potentially preempted) reader to unlock zspage.
> There are only two writers at this moment - migration and compaction.  In
> both cases we use write-try-lock and bail out if zspage is read locked.
> Writers, on the other hand, never get preempted, so readers can spin
> waiting for the writer to unlock zspage.
> 
> With this we can implement a preemptible object mapping.
> 
> Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
> Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
>  mm/zsmalloc.c | 183 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 128 insertions(+), 55 deletions(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index c82c24b8e6a4..80261bb78cf8 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -226,6 +226,9 @@ struct zs_pool {
>  	/* protect page/zspage migration */
>  	rwlock_t lock;
>  	atomic_t compaction_in_progress;
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	struct lock_class_key lockdep_key;
> +#endif
>  };
>  
>  static void pool_write_unlock(struct zs_pool *pool)
> @@ -292,6 +295,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
>  	__free_page(page);
>  }
>  
> +#define ZS_PAGE_UNLOCKED	0
> +#define ZS_PAGE_WRLOCKED	-1
> +
>  struct zspage {
>  	struct {
>  		unsigned int huge:HUGE_BITS;
> @@ -304,7 +310,11 @@ struct zspage {
>  	struct zpdesc *first_zpdesc;
>  	struct list_head list; /* fullness list */
>  	struct zs_pool *pool;
> -	rwlock_t lock;
> +	atomic_t lock;
> +
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	struct lockdep_map lockdep_map;
> +#endif
>  };
>  
>  struct mapping_area {
> @@ -314,6 +324,88 @@ struct mapping_area {
>  	enum zs_mapmode vm_mm; /* mapping mode */
>  };
>  
> +static void zspage_lock_init(struct zspage *zspage)
> +{
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	lockdep_init_map(&zspage->lockdep_map, "zsmalloc-page",
> +			 &zspage->pool->lockdep_key, 0);
> +#endif
> +
> +	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
> +}
> +
> +/*
> + * zspage locking rules:
> + *
> + * 1) writer-lock is exclusive
> + *
> + * 2) writer-lock owner cannot sleep
> + *
> + * 3) writer-lock owner cannot spin waiting for the lock
> + *   - caller (e.g. compaction and migration) must check return value and
> + *     handle locking failures
> + *   - there is only TRY variant of writer-lock function
> + *
> + * 4) reader-lock owners (multiple) can sleep
> + *
> + * 5) reader-lock owners can spin waiting for the lock, in any context
> + *   - existing readers (even preempted ones) don't block new readers
> + *   - writer-lock owners never sleep, always unlock at some point
> + */
> +static void zspage_read_lock(struct zspage *zspage)
> +{
> +	atomic_t *lock = &zspage->lock;
> +	int old = atomic_read_acquire(lock);
> +
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	rwsem_acquire_read(&zspage->lockdep_map, 0, 0, _RET_IP_);
> +#endif
> +
> +	do {
> +		if (old == ZS_PAGE_WRLOCKED) {
> +			cpu_relax();
> +			old = atomic_read_acquire(lock);
> +			continue;
> +		}
> +	} while (!atomic_try_cmpxchg_acquire(lock, &old, old + 1));

Given mcs_spinlock, inventing spinlock in 2025 sounds no good.
See below for the spinlock version.
> +}
> +
> +static void zspage_read_unlock(struct zspage *zspage)
> +{
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	rwsem_release(&zspage->lockdep_map, _RET_IP_);
> +#endif
> +	atomic_dec_return_release(&zspage->lock);
> +}
> +
> +static __must_check bool zspage_try_write_lock(struct zspage *zspage)
> +{
> +	atomic_t *lock = &zspage->lock;
> +	int old = ZS_PAGE_UNLOCKED;
> +
> +	WARN_ON_ONCE(preemptible());
> +
> +	preempt_disable();
> +	if (atomic_try_cmpxchg_acquire(lock, &old, ZS_PAGE_WRLOCKED)) {
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +		rwsem_acquire(&zspage->lockdep_map, 0, 1, _RET_IP_);
> +#endif
> +		return true;
> +	}
> +
> +	preempt_enable();
> +	return false;
> +}
> +
> +static void zspage_write_unlock(struct zspage *zspage)
> +{
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	rwsem_release(&zspage->lockdep_map, _RET_IP_);
> +#endif
> +	atomic_set_release(&zspage->lock, ZS_PAGE_UNLOCKED);
> +	preempt_enable();
> +}

struct zspage_lock {
	spinlock_t	lock;
	int		cnt;
	struct lockdep_map lockdep_map;
};

static __must_check bool zspage_write_trylock(struct zspage_lock *zl)
{
	spin_lock(&zl->lock);
	if (zl->cnt == ZS_PAGE_UNLOCKED) {
		// zl->cnt = ZS_PAGE_WRLOCKED;
		rwsem_acquire(&zl->lockdep_map, 0, 1, _RET_IP_);
		return true;
	}
	spin_unlock(&zl->lock);
	return false;
}

static void zspage_write_unlock(struct zspage_lock *zl)
{
	rwsem_release(&zl->lockdep_map, _RET_IP_);
	spin_unlock(&zl->lock);
}

static void zspage_read_lock(struct zspage_lock *zl)
{
	rwsem_acquire_read(&zl->lockdep_map, 0, 0, _RET_IP_);

	spin_lock(&zl->lock);
	zl->cnt++;
	spin_unlock(&zl->lock);
}

static void zspage_read_unlock(struct zspage_lock *zl)
{
	rwsem_release(&zl->lockdep_map, _RET_IP_);

	spin_lock(&zl->lock);
	zl->cnt--;
	spin_unlock(&zl->lock);
}


  parent reply	other threads:[~2025-02-13 11:33 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-12  6:26 [PATCH v5 00/18] zsmalloc/zram: there be preemption Sergey Senozhatsky
2025-02-12  6:26 ` [PATCH v5 01/18] zram: sleepable entry locking Sergey Senozhatsky
2025-02-13  0:08   ` Andrew Morton
2025-02-13  0:52     ` Sergey Senozhatsky
2025-02-13  1:42       ` Sergey Senozhatsky
2025-02-13  8:49         ` Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 02/18] zram: permit preemption with active compression stream Sergey Senozhatsky
2025-02-12 16:01   ` Yosry Ahmed
2025-02-13  1:04     ` Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 03/18] zram: remove crypto include Sergey Senozhatsky
2025-02-12 16:13   ` Yosry Ahmed
2025-02-13  0:53     ` Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 04/18] zram: remove max_comp_streams device attr Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 05/18] zram: remove two-staged handle allocation Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 06/18] zram: remove writestall zram_stats member Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 07/18] zram: limit max recompress prio to num_active_comps Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 08/18] zram: filter out recomp targets based on priority Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 09/18] zram: rework recompression loop Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 10/18] zsmalloc: factor out pool locking helpers Sergey Senozhatsky
2025-02-12 16:18   ` Yosry Ahmed
2025-02-12 16:19     ` Yosry Ahmed
2025-02-13  0:57     ` Sergey Senozhatsky
2025-02-13  1:12       ` Yosry Ahmed
2025-02-13  2:54         ` Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 11/18] zsmalloc: factor out size-class " Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 12/18] zsmalloc: make zspage lock preemptible Sergey Senozhatsky
2025-02-12 17:14   ` Yosry Ahmed
2025-02-13  1:20     ` Sergey Senozhatsky
2025-02-13  1:31       ` Yosry Ahmed
2025-02-13  1:53         ` Sergey Senozhatsky
2025-02-13 11:32   ` Hillf Danton [this message]
2025-02-13 12:29     ` Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 13/18] zsmalloc: introduce new object mapping API Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 14/18] zram: switch to new zsmalloc " Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 15/18] zram: permit reclaim in zstd custom allocator Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 16/18] zram: do not leak page on recompress_store error path Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 17/18] zram: do not leak page on writeback_store " Sergey Senozhatsky
2025-02-12  6:27 ` [PATCH v5 18/18] zram: add might_sleep to zcomp API Sergey Senozhatsky
2025-02-13  0:09 ` [PATCH v5 00/18] zsmalloc/zram: there be preemption Andrew Morton
2025-02-13  0:51   ` Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250213113248.2225-1-hdanton@sina.com \
    --to=hdanton@sina.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=ryncsn@gmail.com \
    --cc=senozhatsky@chromium.org \
    --cc=yosry.ahmed@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox