Re: [PATCH v5 2/6] ksm: support unsharing zero pages placed by KSM

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: David Hildenbrand <david@redhat.com>
To: yang.yang29@zte.com.cn, akpm@linux-foundation.org
Cc: imbrenda@linux.ibm.com, jiang.xuexin@zte.com.cn,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	ran.xiaokai@zte.com.cn, xu.xin.sc@gmail.com, xu.xin16@zte.com.cn
Subject: Re: [PATCH v5 2/6] ksm: support unsharing zero pages placed by KSM
Date: Wed, 18 Jan 2023 15:10:53 +0100	[thread overview]
Message-ID: <60125c55-9ab7-7531-e124-cb676943fea6@redhat.com> (raw)
In-Reply-To: <202212300913573751808@zte.com.cn>

On 30.12.22 02:13, yang.yang29@zte.com.cn wrote:
> From: xu xin <xu.xin16@zte.com.cn>
> 
> use_zero_pages may be very useful, not just because of cache colouring
> as described in doc, but also because use_zero_pages can accelerate
> merging empty pages when there are plenty of empty pages (full of zeros)
> as the time of page-by-page comparisons (unstable_tree_search_insert) is
> saved.
> 
> But when enabling use_zero_pages, madvise(addr, len, MADV_UNMERGEABLE) and
> other ways (like write 2 to /sys/kernel/mm/ksm/run) to trigger unsharing
> will *not* actually unshare the shared zeropage as placed by KSM (which is
> against the MADV_UNMERGEABLE documentation). As these KSM-placed zero pages
> are out of the control of KSM, the related counts of ksm pages don't expose
> how many zero pages are placed by KSM (these special zero pages are different
> from those initially mapped zero pages, because the zero pages mapped to
> MADV_UNMERGEABLE areas are expected to be a complete and unshared page)
> 
> To not blindly unshare all shared zero_pages in applicable VMAs, the patch
> introduces a dedicated flag ZERO_PAGE_FLAG to mark the rmap_items of those
> shared zero_pages. and guarantee that these rmap_items will be not freed
> during the time of zero_pages not being writing, so we can only unshare
> the *KSM-placed* zero_pages.
> 
> The patch will not degrade the performance of use_zero_pages as it doesn't
> change the way of merging empty pages in use_zero_pages's feature.
> 
> Signed-off-by: xu xin <xu.xin16@zte.com.cn>
> Reported-by: David Hildenbrand <david@redhat.com>
> Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Cc: Xuexin Jiang <jiang.xuexin@zte.com.cn>
> Reviewed-by: Xiaokai Ran <ran.xiaokai@zte.com.cn>
> Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>

[...]

> 
>   /* The stable and unstable tree heads */
>   static struct rb_root one_stable_tree[1] = { RB_ROOT };
> @@ -420,6 +421,11 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
>   	return atomic_read(&mm->mm_users) == 0;
>   }
> 
> +enum break_ksm_pmd_entry_return_flag {
> +	HAVE_KSM_PAGE = 1,
> +	HAVE_ZERO_PAGE
> +};

Why use flags if they both conditions are mutually exclusive?

> +
>   static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
>   			struct mm_walk *walk)
>   {
> @@ -427,6 +433,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
>   	spinlock_t *ptl;
>   	pte_t *pte;
>   	int ret;
> +	bool is_zero_page = false;
> 
>   	if (pmd_leaf(*pmd) || !pmd_present(*pmd))
>   		return 0;
> @@ -434,6 +441,8 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
>   	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
>   	if (pte_present(*pte)) {
>   		page = vm_normal_page(walk->vma, addr, *pte);
> +		if (!page)
> +			is_zero_page = is_zero_pfn(pte_pfn(*pte));
>   	} else if (!pte_none(*pte)) {
>   		swp_entry_t entry = pte_to_swp_entry(*pte);
> 
> @@ -444,7 +453,14 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
>   		if (is_migration_entry(entry))
>   			page = pfn_swap_entry_to_page(entry);
>   	}
> -	ret = page && PageKsm(page);
> +
> +	if (page && PageKsm(page))
> +		ret = HAVE_KSM_PAGE;
> +	else if (is_zero_page)
> +		ret = HAVE_ZERO_PAGE;
> +	else
> +		ret = 0;
> +
>   	pte_unmap_unlock(pte, ptl);
>   	return ret;
>   }
> @@ -466,19 +482,22 @@ static const struct mm_walk_ops break_ksm_ops = {
>    * of the process that owns 'vma'.  We also do not want to enforce
>    * protection keys here anyway.
>    */
> -static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
> +static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
> +				     bool unshare_zero_page)
>   {
>   	vm_fault_t ret = 0;
> 
>   	do {
> -		int ksm_page;
> +		int walk_result;
> 
>   		cond_resched();
> -		ksm_page = walk_page_range_vma(vma, addr, addr + 1,
> +		walk_result = walk_page_range_vma(vma, addr, addr + 1,
>   					       &break_ksm_ops, NULL);
> -		if (WARN_ON_ONCE(ksm_page < 0))
> -			return ksm_page;
> -		if (!ksm_page)
> +		if (WARN_ON_ONCE(walk_result < 0))
> +			return walk_result;
> +		if (!walk_result)
> +			return 0;
> +		if (walk_result == HAVE_ZERO_PAGE && !unshare_zero_page)
>   			return 0;
>   		ret = handle_mm_fault(vma, addr,
>   				      FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
> @@ -539,7 +558,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
>   	mmap_read_lock(mm);
>   	vma = find_mergeable_vma(mm, addr);
>   	if (vma)
> -		break_ksm(vma, addr);
> +		break_ksm(vma, addr, false);
>   	mmap_read_unlock(mm);
>   }
> 
> @@ -764,6 +783,30 @@ static struct page *get_ksm_page(struct ksm_stable_node *stable_node,
>   	return NULL;
>   }
> 
> +/*
> + * Cleaning the rmap_item's ZERO_PAGE_FLAG
> + * This function will be called when unshare or writing on zero pages.
> + */
> +static inline void clean_rmap_item_zero_flag(struct ksm_rmap_item *rmap_item)
> +{
> +	if (rmap_item->address & ZERO_PAGE_FLAG)
> +		rmap_item->address &= PAGE_MASK;
> +}
> +
> +/* Only called when rmap_item is going to be freed */
> +static inline void unshare_zero_pages(struct ksm_rmap_item *rmap_item)
> +{
> +	struct vm_area_struct *vma;
> +
> +	if (rmap_item->address & ZERO_PAGE_FLAG) {
> +		vma = vma_lookup(rmap_item->mm, rmap_item->address);
> +		if (vma && !ksm_test_exit(rmap_item->mm))
> +			break_ksm(vma, rmap_item->address, true);
> +	}
> +	/* Put at last. */
> +	clean_rmap_item_zero_flag(rmap_item);
> +}
> +
>   /*
>    * Removing rmap_item from stable or unstable tree.
>    * This function will clean the information from the stable/unstable tree.
> @@ -824,6 +867,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
>   		struct ksm_rmap_item *rmap_item = *rmap_list;
>   		*rmap_list = rmap_item->rmap_list;
>   		remove_rmap_item_from_tree(rmap_item);
> +		unshare_zero_pages(rmap_item);
>   		free_rmap_item(rmap_item);
>   	}
>   }
> @@ -853,7 +897,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
>   		if (signal_pending(current))
>   			err = -ERESTARTSYS;
>   		else
> -			err = break_ksm(vma, addr);
> +			err = break_ksm(vma, addr, false);
>   	}

MADV_UNMERGEABLE -> unmerge_ksm_pages() will never unshare the shared 
zeropage? I thought the patch description mentions that that is one of 
the goals?

-- 
Thanks,

David / dhildenb

next prev parent reply	other threads:[~2023-01-18 14:11 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-30  1:13 yang.yang29
2023-01-18 14:10 ` David Hildenbrand [this message]
2023-02-04  6:18   ` yang.yang29
2023-02-13 12:44     ` David Hildenbrand
2023-03-11  5:37 xu xin
2023-03-13 12:14 ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=60125c55-9ab7-7531-e124-cb676943fea6@redhat.com \
    --to=david@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=imbrenda@linux.ibm.com \
    --cc=jiang.xuexin@zte.com.cn \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ran.xiaokai@zte.com.cn \
    --cc=xu.xin.sc@gmail.com \
    --cc=xu.xin16@zte.com.cn \
    --cc=yang.yang29@zte.com.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox