[RFC PATCH v3 3/7] hq-spinlock: add contention detection

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Fedorov Nikita <fedorov.nikita@h-partners.com>
To: Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>, Thomas Gleixner <tglx@kernel.org>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>, <x86@kernel.org>,
	<hpa@zytor.com>, Juergen Gross <jgross@suse.com>,
	Ajay Kaher <ajay.kaher@broadcom.com>,
	Alexey Makhalov <alexey.makhalov@broadcom.com>,
	<bcm-kernel-feedback-list@broadcom.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Boqun Feng <boqun@kernel.org>, Waiman Long <longman@redhat.com>,
	Darren Hart <dvhart@infradead.org>,
	Davidlohr Bueso <dave@stgolabs.net>, <andrealmeid@igalia.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>, Zi Yan <ziy@nvidia.com>,
	Matthew Brost <matthew.brost@intel.com>,
	Joshua Hahn <joshua.hahnjy@gmail.com>,
	Rakie Kim <rakie.kim@sk.com>, <byungchul@sk.com>,
	Gregory Price <gourry@gourry.net>,
	Ying Huang <ying.huang@linux.alibaba.com>,
	Alistair Popple <apopple@nvidia.com>,
	Anatoly Stepanov <stepanov.anatoly@huawei.com>
Cc: Nikita Fedorov <fedorov.nikita@h-partners.com>,
	<linux-arm-kernel@lists.infradead.org>,
	<linux-kernel@vger.kernel.org>, <virtualization@lists.linux.dev>,
	<linux-arch@vger.kernel.org>, <linux-mm@kvack.org>,
	<guohanjun@huawei.com>, <wangkefeng.wang@huawei.com>,
	<weiyongjun1@huawei.com>, <yusongping@huawei.com>,
	<leijitang@huawei.com>, <artem.kuzin@huawei.com>,
	<kang.sun@huawei.com>, <chenjieping3@huawei.com>
Subject: [RFC PATCH v3 3/7] hq-spinlock: add contention detection
Date: Thu, 16 Apr 2026 00:44:55 +0800	[thread overview]
Message-ID: <20260415164459.2904963-4-fedorov.nikita@h-partners.com> (raw)
In-Reply-To: <20260415164459.2904963-1-fedorov.nikita@h-partners.com>

The hierarchical slowpath is needed for locks that experience
sustained cross-node contention. Enabling it unconditionally is
undesirable for lightly contended locks and may decrease the performance.

Add a simple contention detection scheme that tracks remote handoffs
separately from overall handoff activity and enables HQ mode only when
the observed handoff pattern indicates that cross-node contention is
high enough to benefit from NUMA-aware queueing.

HQ lock type can be turned on if remote_handoffs exceeds `hqlock_remote_handoffs_turn_numa`.
Lock can be turned back into QSPINLOCK mode if in HQ mode
amount of remote handoffs will not exceed `hqlock_remote_handoffs_keep_numa`.

Remote handoffs counter will increase only if the amount of local handoffs after previous increase
is not less than `hqlock_local_handoffs_to_increase_remotes`.

Additional locktorture reruns showed no degradation
in low-contention configurations after adding contention-based switching
while maintaining practically the same performance improvement in high contention cases.

Co-developed-by: Anatoly Stepanov <stepanov.anatoly@huawei.com>
Signed-off-by: Anatoly Stepanov <stepanov.anatoly@huawei.com>
Co-developed-by: Nikita Fedorov <fedorov.nikita@h-partners.com>
Signed-off-by: Nikita Fedorov <fedorov.nikita@h-partners.com>
---
 kernel/locking/hqlock_core.h  | 57 +++++++++++++++++++++++++++++++++--
 kernel/locking/hqlock_meta.h  |  4 +++
 kernel/locking/hqlock_types.h |  8 +++--
 3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/hqlock_core.h b/kernel/locking/hqlock_core.h
index 7322199228..e2ba09d758 100644
--- a/kernel/locking/hqlock_core.h
+++ b/kernel/locking/hqlock_core.h
@@ -450,6 +450,23 @@ static inline void hqlock_handoff(struct qspinlock *lock,
 					 struct mcs_spinlock *next, u32 tail,
 					 int handoff_info);
 
+/*
+ * In low_contention_mcs_lock_handoff we wanted to help processor optimise writes
+ * and avoid extra reading of our cpu cacheline (read our qnode->numa_node),
+ * so previous contender has saved his numa node in our prev_numa_node,
+ * and now we need to update remote_handoffs counter by ourself
+ */
+static __always_inline void update_counters_qspinlock(struct numa_qnode *qnode)
+{
+	if (qnode->numa_node != qnode->prev_numa_node) {
+		if ((qnode->general_handoffs - qnode->prev_general_handoffs)
+		    > hqlock_local_handoffs_to_increase_remotes) {
+			qnode->remote_handoffs++;
+		}
+
+		qnode->prev_general_handoffs = qnode->general_handoffs;
+	}
+}
 
 /*
  * Chech if contention has risen and if we need to set NUMA-aware mode
@@ -458,8 +475,13 @@ static __always_inline bool determine_contention_qspinlock_mode(struct mcs_spinl
 {
 	struct numa_qnode *qnode = (void *)node;
 
-	if (qnode->general_handoffs > READ_ONCE(hqlock_general_handoffs_turn_numa))
+	unsigned long general_handoffs = (unsigned long) qnode->general_handoffs;
+	unsigned long remote_handoffs = (unsigned long) qnode->remote_handoffs;
+
+	if ((general_handoffs > hqlock_general_handoffs_turn_numa) &&
+		(remote_handoffs > hqlock_remote_handoffs_turn_numa))
 		return true;
+
 	return false;
 }
 
@@ -485,7 +507,14 @@ static __always_inline bool low_contention_try_clear_tail(struct qspinlock *lock
 	else
 		update_val |= _Q_LOCK_INVALID_TAIL;
 
-	return atomic_try_cmpxchg_relaxed(&lock->val, &val, update_val);
+	bool ret = atomic_try_cmpxchg_relaxed(&lock->val, &val, update_val);
+
+#ifdef CONFIG_HQSPINLOCKS_DEBUG
+	if (ret && high_contention)
+		atomic_inc(&transitions_from_qspinlock_to_hq);
+#endif
+
+	return ret;
 }
 
 static __always_inline void low_contention_mcs_lock_handoff(struct mcs_spinlock *node,
@@ -502,6 +531,17 @@ static __always_inline void low_contention_mcs_lock_handoff(struct mcs_spinlock
 		general_handoffs++;
 
 	qnext->general_handoffs = general_handoffs;
+	qnext->remote_handoffs = qnode->remote_handoffs;
+	qnext->prev_general_handoffs = qnode->prev_general_handoffs;
+
+	/*
+	 * Show next contender our numa node and assume
+	 * he will update remote_handoffs counter in update_counters_qspinlock by himself
+	 * instead of reading his numa_node and updating remote_handoffs here
+	 * to avoid extra cacheline transferring and help processor optimise several writes here
+	 */
+	qnext->prev_numa_node = qnode->numa_node;
+
 	arch_mcs_spin_unlock_contended(&next->locked);
 }
 
@@ -557,6 +597,10 @@ static inline void hqlock_init_node(struct mcs_spinlock *node)
 	qnode->numa_node = numa_node_id() + 1;
 	qnode->lock_id = 0;
 	qnode->wrong_fallback_tail = 0;
+
+	qnode->remote_handoffs = 0;
+	qnode->prev_numa_node = 0;
+	qnode->prev_general_handoffs = 0;
 }
 
 static inline void reset_handoff_counter(struct numa_qnode *qnode)
@@ -580,6 +624,8 @@ static inline void handoff_local(struct mcs_spinlock *node,
 
 	qnext->general_handoffs = general_handoffs;
 
+	qnext->remote_handoffs = qnode->remote_handoffs;
+
 	u16 wrong_fallback_tail = qnode->wrong_fallback_tail;
 
 	if (wrong_fallback_tail != 0 && wrong_fallback_tail != (tail >> _Q_TAIL_OFFSET)) {
@@ -641,6 +687,13 @@ static inline void handoff_remote(struct qspinlock *lock,
 
 	mcs_head = (void *) qhead;
 
+	u16 remote_handoffs = qnode->remote_handoffs;
+
+	if (qnode->general_handoffs > hqlock_local_handoffs_to_increase_remotes)
+		remote_handoffs++;
+
+	qhead->remote_handoffs = remote_handoffs;
+
 	/* arch_mcs_spin_unlock_contended implies smp-barrier */
 	arch_mcs_spin_unlock_contended(&mcs_head->locked);
 }
diff --git a/kernel/locking/hqlock_meta.h b/kernel/locking/hqlock_meta.h
index 5b54801326..561d5a5fd0 100644
--- a/kernel/locking/hqlock_meta.h
+++ b/kernel/locking/hqlock_meta.h
@@ -307,6 +307,10 @@ static inline void release_lock_meta(struct qspinlock *lock,
 			goto do_rollback;
 	}
 
+	if (qnode->remote_handoffs < hqlock_remote_handoffs_keep_numa) {
+		upd_val |= _Q_LOCK_MODE_QSPINLOCK_VAL;
+	}
+
 	/*
 	 * We need wait until pending is gone.
 	 * Otherwise, clearing pending can erase a mode we will set here
diff --git a/kernel/locking/hqlock_types.h b/kernel/locking/hqlock_types.h
index 32d06f2755..40061f11a1 100644
--- a/kernel/locking/hqlock_types.h
+++ b/kernel/locking/hqlock_types.h
@@ -37,9 +37,13 @@ struct numa_qnode {
 
 	u16 lock_id;
 	u16 wrong_fallback_tail;
-	u16 general_handoffs;
-
 	u16 numa_node;
+
+
+	u16 general_handoffs;
+	u16 remote_handoffs;
+	u16 prev_general_handoffs;
+	u16 prev_numa_node;
 };
 
 struct numa_queue {
-- 
2.34.1

next prev parent reply	other threads:[~2026-04-15 16:47 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-15 16:44 [RFC PATCH v3 0/7] Fedorov Nikita
2026-04-15 16:44 ` [RFC PATCH v3 1/7] kernel: add hq-spinlock types Fedorov Nikita
2026-04-15 16:44 ` [RFC PATCH v3 2/7] hq-spinlock: implement inner logic Fedorov Nikita
2026-04-15 16:44 ` Fedorov Nikita [this message]
2026-04-15 16:44 ` [RFC PATCH v3 4/7] hq-spinlock: add hq-spinlock tunables and debug statistics Fedorov Nikita
2026-04-15 16:44 ` [RFC PATCH v3 5/7] kernel: introduce general hq-spinlock support Fedorov Nikita
2026-04-15 16:44 ` [RFC PATCH v3 6/7] lockref: use hq-spinlock Fedorov Nikita
2026-04-15 16:44 ` [RFC PATCH v3 7/7] futex: use hq-spinlock for hash buckets Fedorov Nikita

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260415164459.2904963-4-fedorov.nikita@h-partners.com \
    --to=fedorov.nikita@h-partners.com \
    --cc=ajay.kaher@broadcom.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexey.makhalov@broadcom.com \
    --cc=andrealmeid@igalia.com \
    --cc=apopple@nvidia.com \
    --cc=arnd@arndb.de \
    --cc=artem.kuzin@huawei.com \
    --cc=bcm-kernel-feedback-list@broadcom.com \
    --cc=boqun@kernel.org \
    --cc=bp@alien8.de \
    --cc=byungchul@sk.com \
    --cc=catalin.marinas@arm.com \
    --cc=chenjieping3@huawei.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@kernel.org \
    --cc=dvhart@infradead.org \
    --cc=gourry@gourry.net \
    --cc=guohanjun@huawei.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=joshua.hahnjy@gmail.com \
    --cc=kang.sun@huawei.com \
    --cc=leijitang@huawei.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=matthew.brost@intel.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rakie.kim@sk.com \
    --cc=stepanov.anatoly@huawei.com \
    --cc=tglx@kernel.org \
    --cc=virtualization@lists.linux.dev \
    --cc=wangkefeng.wang@huawei.com \
    --cc=weiyongjun1@huawei.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yusongping@huawei.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox