linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: Kairui Song <kasong@tencent.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
	Ryan Roberts <ryan.roberts@arm.com>,
	Hugh Dickins <hughd@google.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	"Huang, Ying" <ying.huang@linux.alibaba.com>,
	Nhat Pham <nphamcs@gmail.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Kalesh Singh <kaleshsingh@google.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 04/13] mm, swap: use cluster lock for HDD
Date: Thu, 9 Jan 2025 12:07:43 +0800	[thread overview]
Message-ID: <Z39Lj5Vr4sfb0IlA@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20241230174621.61185-5-ryncsn@gmail.com>

On 12/31/24 at 01:46am, Kairui Song wrote:
> From: Kairui Song <kasong@tencent.com>
> 
> Cluster lock (ci->lock) was introduce to reduce contention for certain
                              ~~~~~~~~~ typo, introduced.

> operations. Using cluster lock for HDD is not helpful as HDD have a poor
> performance, so locking isn't the bottleneck. But having different set
> of locks for HDD / non-HDD prevents further rework of device lock
> (si->lock).
> 
> This commit just changed all lock_cluster_or_swap_info to lock_cluster,
> which is a safe and straight conversion since cluster info is always
> allocated now, also removed all cluster_info related checks.
> 
> Suggested-by: Chris Li <chrisl@kernel.org>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
>  mm/swapfile.c | 107 ++++++++++++++++----------------------------------
>  1 file changed, 34 insertions(+), 73 deletions(-)

Other than the nit in patch log,  LGTM,

Reviewed-by: Baoquan He <bhe@redhat.com>

> 
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index fca58d43b836..d0e5b9fa0c48 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -58,10 +58,9 @@ static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry
>  static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
>  			     unsigned int nr_entries);
>  static bool folio_swapcache_freeable(struct folio *folio);
> -static struct swap_cluster_info *lock_cluster_or_swap_info(
> -		struct swap_info_struct *si, unsigned long offset);
> -static void unlock_cluster_or_swap_info(struct swap_info_struct *si,
> -					struct swap_cluster_info *ci);
> +static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
> +					      unsigned long offset);
> +static void unlock_cluster(struct swap_cluster_info *ci);
>  
>  static DEFINE_SPINLOCK(swap_lock);
>  static unsigned int nr_swapfiles;
> @@ -222,9 +221,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
>  	 * swap_map is HAS_CACHE only, which means the slots have no page table
>  	 * reference or pending writeback, and can't be allocated to others.
>  	 */
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	need_reclaim = swap_is_has_cache(si, offset, nr_pages);
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	if (!need_reclaim)
>  		goto out_unlock;
>  
> @@ -404,45 +403,15 @@ static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si
>  {
>  	struct swap_cluster_info *ci;
>  
> -	ci = si->cluster_info;
> -	if (ci) {
> -		ci += offset / SWAPFILE_CLUSTER;
> -		spin_lock(&ci->lock);
> -	}
> -	return ci;
> -}
> -
> -static inline void unlock_cluster(struct swap_cluster_info *ci)
> -{
> -	if (ci)
> -		spin_unlock(&ci->lock);
> -}
> -
> -/*
> - * Determine the locking method in use for this device.  Return
> - * swap_cluster_info if SSD-style cluster-based locking is in place.
> - */
> -static inline struct swap_cluster_info *lock_cluster_or_swap_info(
> -		struct swap_info_struct *si, unsigned long offset)
> -{
> -	struct swap_cluster_info *ci;
> -
> -	/* Try to use fine-grained SSD-style locking if available: */
> -	ci = lock_cluster(si, offset);
> -	/* Otherwise, fall back to traditional, coarse locking: */
> -	if (!ci)
> -		spin_lock(&si->lock);
> +	ci = &si->cluster_info[offset / SWAPFILE_CLUSTER];
> +	spin_lock(&ci->lock);
>  
>  	return ci;
>  }
>  
> -static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
> -					       struct swap_cluster_info *ci)
> +static inline void unlock_cluster(struct swap_cluster_info *ci)
>  {
> -	if (ci)
> -		unlock_cluster(ci);
> -	else
> -		spin_unlock(&si->lock);
> +	spin_unlock(&ci->lock);
>  }
>  
>  /* Add a cluster to discard list and schedule it to do discard */
> @@ -558,9 +527,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
>  	unsigned long idx = page_nr / SWAPFILE_CLUSTER;
>  	struct swap_cluster_info *ci;
>  
> -	if (!cluster_info)
> -		return;
> -
>  	ci = cluster_info + idx;
>  	ci->count++;
>  
> @@ -576,9 +542,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
>  static void dec_cluster_info_page(struct swap_info_struct *si,
>  				  struct swap_cluster_info *ci, int nr_pages)
>  {
> -	if (!si->cluster_info)
> -		return;
> -
>  	VM_BUG_ON(ci->count < nr_pages);
>  	VM_BUG_ON(cluster_is_free(ci));
>  	lockdep_assert_held(&si->lock);
> @@ -1007,8 +970,6 @@ static int cluster_alloc_swap(struct swap_info_struct *si,
>  {
>  	int n_ret = 0;
>  
> -	VM_BUG_ON(!si->cluster_info);
> -
>  	si->flags += SWP_SCANNING;
>  
>  	while (n_ret < nr) {
> @@ -1052,10 +1013,10 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
>  		}
>  
>  		/*
> -		 * Swapfile is not block device or not using clusters so unable
> +		 * Swapfile is not block device so unable
>  		 * to allocate large entries.
>  		 */
> -		if (!(si->flags & SWP_BLKDEV) || !si->cluster_info)
> +		if (!(si->flags & SWP_BLKDEV))
>  			return 0;
>  	}
>  
> @@ -1295,9 +1256,9 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
>  	unsigned long offset = swp_offset(entry);
>  	unsigned char usage;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	usage = __swap_entry_free_locked(si, offset, 1);
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	if (!usage)
>  		free_swap_slot(entry);
>  
> @@ -1320,14 +1281,14 @@ static bool __swap_entries_free(struct swap_info_struct *si,
>  	if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER)
>  		goto fallback;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	if (!swap_is_last_map(si, offset, nr, &has_cache)) {
> -		unlock_cluster_or_swap_info(si, ci);
> +		unlock_cluster(ci);
>  		goto fallback;
>  	}
>  	for (i = 0; i < nr; i++)
>  		WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  
>  	if (!has_cache) {
>  		for (i = 0; i < nr; i++)
> @@ -1383,7 +1344,7 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
>  	DECLARE_BITMAP(to_free, BITS_PER_LONG) = { 0 };
>  	int i, nr;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	while (nr_pages) {
>  		nr = min(BITS_PER_LONG, nr_pages);
>  		for (i = 0; i < nr; i++) {
> @@ -1391,18 +1352,18 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
>  				bitmap_set(to_free, i, 1);
>  		}
>  		if (!bitmap_empty(to_free, BITS_PER_LONG)) {
> -			unlock_cluster_or_swap_info(si, ci);
> +			unlock_cluster(ci);
>  			for_each_set_bit(i, to_free, BITS_PER_LONG)
>  				free_swap_slot(swp_entry(si->type, offset + i));
>  			if (nr == nr_pages)
>  				return;
>  			bitmap_clear(to_free, 0, BITS_PER_LONG);
> -			ci = lock_cluster_or_swap_info(si, offset);
> +			ci = lock_cluster(si, offset);
>  		}
>  		offset += nr;
>  		nr_pages -= nr;
>  	}
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  }
>  
>  /*
> @@ -1441,9 +1402,9 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
>  	if (!si)
>  		return;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	if (size > 1 && swap_is_has_cache(si, offset, size)) {
> -		unlock_cluster_or_swap_info(si, ci);
> +		unlock_cluster(ci);
>  		spin_lock(&si->lock);
>  		swap_entry_range_free(si, entry, size);
>  		spin_unlock(&si->lock);
> @@ -1451,14 +1412,14 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
>  	}
>  	for (int i = 0; i < size; i++, entry.val++) {
>  		if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
> -			unlock_cluster_or_swap_info(si, ci);
> +			unlock_cluster(ci);
>  			free_swap_slot(entry);
>  			if (i == size - 1)
>  				return;
> -			lock_cluster_or_swap_info(si, offset);
> +			lock_cluster(si, offset);
>  		}
>  	}
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  }
>  
>  static int swp_entry_cmp(const void *ent1, const void *ent2)
> @@ -1522,9 +1483,9 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
>  	struct swap_cluster_info *ci;
>  	int count;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  	count = swap_count(si->swap_map[offset]);
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	return count;
>  }
>  
> @@ -1547,7 +1508,7 @@ int swp_swapcount(swp_entry_t entry)
>  
>  	offset = swp_offset(entry);
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  
>  	count = swap_count(si->swap_map[offset]);
>  	if (!(count & COUNT_CONTINUED))
> @@ -1570,7 +1531,7 @@ int swp_swapcount(swp_entry_t entry)
>  		n *= (SWAP_CONT_MAX + 1);
>  	} while (tmp_count & COUNT_CONTINUED);
>  out:
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	return count;
>  }
>  
> @@ -1585,8 +1546,8 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
>  	int i;
>  	bool ret = false;
>  
> -	ci = lock_cluster_or_swap_info(si, offset);
> -	if (!ci || nr_pages == 1) {
> +	ci = lock_cluster(si, offset);
> +	if (nr_pages == 1) {
>  		if (swap_count(map[roffset]))
>  			ret = true;
>  		goto unlock_out;
> @@ -1598,7 +1559,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
>  		}
>  	}
>  unlock_out:
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	return ret;
>  }
>  
> @@ -3428,7 +3389,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
>  	offset = swp_offset(entry);
>  	VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
>  	VM_WARN_ON(usage == 1 && nr > 1);
> -	ci = lock_cluster_or_swap_info(si, offset);
> +	ci = lock_cluster(si, offset);
>  
>  	err = 0;
>  	for (i = 0; i < nr; i++) {
> @@ -3483,7 +3444,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
>  	}
>  
>  unlock_out:
> -	unlock_cluster_or_swap_info(si, ci);
> +	unlock_cluster(ci);
>  	return err;
>  }
>  
> -- 
> 2.47.1
> 
> 



  reply	other threads:[~2025-01-09  4:08 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-30 17:46 [PATCH v3 00/13] mm, swap: rework of swap allocator locks Kairui Song
2024-12-30 17:46 ` [PATCH v3 01/13] mm, swap: minor clean up for swap entry allocation Kairui Song
2025-01-09  4:04   ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 02/13] mm, swap: fold swap_info_get_cont in the only caller Kairui Song
2025-01-09  4:05   ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 03/13] mm, swap: remove old allocation path for HDD Kairui Song
2025-01-09  4:06   ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 04/13] mm, swap: use cluster lock " Kairui Song
2025-01-09  4:07   ` Baoquan He [this message]
2024-12-30 17:46 ` [PATCH v3 05/13] mm, swap: clean up device availability check Kairui Song
2025-01-09  4:08   ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 06/13] mm, swap: clean up plist removal and adding Kairui Song
2025-01-02  8:59   ` Baoquan He
2025-01-03  8:07     ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 07/13] mm, swap: hold a reference during scan and cleanup flag usage Kairui Song
2025-01-04  5:46   ` Baoquan He
2025-01-13  5:34     ` Kairui Song
2025-01-20  2:39       ` Baoquan He
2025-01-27  9:19         ` Kairui Song
2025-02-05  9:18           ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 08/13] mm, swap: use an enum to define all cluster flags and wrap flags changes Kairui Song
2025-01-06  8:43   ` Baoquan He
2025-01-13  5:49     ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 09/13] mm, swap: reduce contention on device lock Kairui Song
2025-01-06 10:12   ` Baoquan He
2025-01-08 11:09   ` Baoquan He
2025-01-09  2:15     ` Kairui Song
2025-01-10 11:23       ` Baoquan He
2025-01-13  6:33         ` Kairui Song
2025-01-13  8:07           ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 10/13] mm, swap: simplify percpu cluster updating Kairui Song
2025-01-09  2:07   ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 11/13] mm, swap: introduce a helper for retrieving cluster from offset Kairui Song
2024-12-30 17:46 ` [PATCH v3 12/13] mm, swap: use a global swap cluster for non-rotation devices Kairui Song
2024-12-30 17:46 ` [PATCH v3 13/13] mm, swap_slots: remove slot cache for freeing path Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z39Lj5Vr4sfb0IlA@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kaleshsingh@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=v-songbaohua@oppo.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox