linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yosry Ahmed <yosryahmed@google.com>
To: Usama Arif <usamaarif642@gmail.com>
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org,
	shakeel.butt@linux.dev,  david@redhat.com, ying.huang@intel.com,
	hughd@google.com, willy@infradead.org,  nphamcs@gmail.com,
	chengming.zhou@linux.dev, linux-mm@kvack.org,
	 linux-kernel@vger.kernel.org, kernel-team@meta.com
Subject: Re: [PATCH v4 1/2] mm: store zero pages to be swapped out in a bitmap
Date: Wed, 12 Jun 2024 20:13:51 +0000	[thread overview]
Message-ID: <ZmoBf6RPJzC2RaqM@google.com> (raw)
In-Reply-To: <20240612124750.2220726-2-usamaarif642@gmail.com>

On Wed, Jun 12, 2024 at 01:43:35PM +0100, Usama Arif wrote:
[..]

Hi Usama,

A few more comments/questions, sorry for not looking closely earlier.

> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index f1e559e216bd..48d8dca0b94b 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -453,6 +453,8 @@ static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
>  static void swap_cluster_schedule_discard(struct swap_info_struct *si,
>  		unsigned int idx)
>  {
> +	unsigned int i;
> +
>  	/*
>  	 * If scan_swap_map_slots() can't find a free cluster, it will check
>  	 * si->swap_map directly. To make sure the discarding cluster isn't
> @@ -461,6 +463,13 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
>  	 */
>  	memset(si->swap_map + idx * SWAPFILE_CLUSTER,
>  			SWAP_MAP_BAD, SWAPFILE_CLUSTER);
> +	/*
> +	 * zeromap can see updates from concurrent swap_writepage() and swap_read_folio()
> +	 * call on other slots, hence use atomic clear_bit for zeromap instead of the
> +	 * non-atomic bitmap_clear.
> +	 */

I don't think this is accurate. swap_read_folio() does not update the
zeromap. I think the need for an atomic operation here is because we may
be updating adjacent bits simulatenously, so we may cause lost updates
otherwise (i.e. corrupting adjacent bits).

> +	for (i = 0; i < SWAPFILE_CLUSTER; i++)
> +		clear_bit(idx * SWAPFILE_CLUSTER + i, si->zeromap);

Could you explain why we need to clear the zeromap here?

swap_cluster_schedule_discard() is called from:
- swap_free_cluster() -> free_cluster()

This is already covered below.

- swap_entry_free() -> dec_cluster_info_page() -> free_cluster()

Each entry in the cluster should have its zeromap bit cleared in
swap_entry_free() before the entire cluster is free and we call
free_cluster().

Am I missing something?

>  
>  	cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
>  
> @@ -482,7 +491,7 @@ static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
>  static void swap_do_scheduled_discard(struct swap_info_struct *si)
>  {
>  	struct swap_cluster_info *info, *ci;
> -	unsigned int idx;
> +	unsigned int idx, i;
>  
>  	info = si->cluster_info;
>  
> @@ -498,6 +507,8 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
>  		__free_cluster(si, idx);
>  		memset(si->swap_map + idx * SWAPFILE_CLUSTER,
>  				0, SWAPFILE_CLUSTER);
> +		for (i = 0; i < SWAPFILE_CLUSTER; i++)
> +			clear_bit(idx * SWAPFILE_CLUSTER + i, si->zeromap);

Same here. I didn't look into the specific code paths, but shouldn't the
cluster be unused (and hence its zeromap bits already cleared?).

>  		unlock_cluster(ci);
>  	}
>  }
> @@ -1059,9 +1070,12 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
>  {
>  	unsigned long offset = idx * SWAPFILE_CLUSTER;
>  	struct swap_cluster_info *ci;
> +	unsigned int i;
>  
>  	ci = lock_cluster(si, offset);
>  	memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
> +	for (i = 0; i < SWAPFILE_CLUSTER; i++)
> +		clear_bit(offset + i, si->zeromap);
>  	cluster_set_count_flag(ci, 0, 0);
>  	free_cluster(si, idx);
>  	unlock_cluster(ci);
> @@ -1336,6 +1350,7 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
>  	count = p->swap_map[offset];
>  	VM_BUG_ON(count != SWAP_HAS_CACHE);
>  	p->swap_map[offset] = 0;
> +	clear_bit(offset, p->zeromap);

I think instead of clearing the zeromap in swap_free_cluster() and here
separately, we can just do it in swap_range_free(). I suspect this may
be the only place we really need to clear the zero in the swapfile code.

>  	dec_cluster_info_page(p, p->cluster_info, offset);
>  	unlock_cluster(ci);
>  
> @@ -2597,6 +2612,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
>  	free_percpu(p->cluster_next_cpu);
>  	p->cluster_next_cpu = NULL;
>  	vfree(swap_map);
> +	bitmap_free(p->zeromap);
>  	kvfree(cluster_info);
>  	/* Destroy swap account information */
>  	swap_cgroup_swapoff(p->type);
> @@ -3123,6 +3139,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>  		goto bad_swap_unlock_inode;
>  	}
>  
> +	p->zeromap = bitmap_zalloc(maxpages, GFP_KERNEL);
> +	if (!p->zeromap) {
> +		error = -ENOMEM;
> +		goto bad_swap_unlock_inode;
> +	}
> +
>  	if (p->bdev && bdev_stable_writes(p->bdev))
>  		p->flags |= SWP_STABLE_WRITES;
>  
> -- 
> 2.43.0
> 


  reply	other threads:[~2024-06-12 20:13 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-12 12:43 [PATCH v4 0/2] " Usama Arif
2024-06-12 12:43 ` [PATCH v4 1/2] " Usama Arif
2024-06-12 20:13   ` Yosry Ahmed [this message]
2024-06-13 11:37     ` Usama Arif
2024-06-13 16:38       ` Yosry Ahmed
2024-06-13 19:21         ` Usama Arif
2024-06-13 19:26           ` Yosry Ahmed
2024-06-13 19:38             ` Usama Arif
2024-09-04  5:55   ` Barry Song
2024-09-04  7:12     ` Yosry Ahmed
2024-09-04  7:17       ` Barry Song
2024-09-04  7:22         ` Yosry Ahmed
2024-09-04  7:54           ` Barry Song
2024-09-04 17:40             ` Yosry Ahmed
2024-09-05  7:03               ` Barry Song
2024-09-05  7:55                 ` Yosry Ahmed
2024-09-05  8:49                   ` Barry Song
2024-09-05 10:10                     ` Barry Song
2024-09-05 10:33                       ` Barry Song
2024-09-05 10:53                         ` Usama Arif
2024-09-05 11:00                           ` Barry Song
2024-09-05 19:19                             ` Usama Arif
2024-09-05 17:36                         ` Yosry Ahmed
2024-09-05 19:28                         ` Yosry Ahmed
2024-09-06 10:22                           ` Barry Song
2024-09-05 10:37                       ` Usama Arif
2024-09-05 10:42                         ` Barry Song
2024-09-05 10:50                           ` Usama Arif
2024-09-04 11:14     ` Usama Arif
2024-09-04 23:44       ` Barry Song
2024-09-04 23:47         ` Barry Song
2024-09-04 23:57         ` Yosry Ahmed
2024-09-05  0:29           ` Barry Song
2024-09-05  7:38             ` Yosry Ahmed
2024-06-12 12:43 ` [PATCH v4 2/2] mm: remove code to handle same filled pages Usama Arif
2024-06-12 15:09   ` Nhat Pham
2024-06-12 16:34     ` Usama Arif

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZmoBf6RPJzC2RaqM@google.com \
    --to=yosryahmed@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=chengming.zhou@linux.dev \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=shakeel.butt@linux.dev \
    --cc=usamaarif642@gmail.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox