linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Vishal Moola <vishal.moola@gmail.com>,
	linux-mm@kvack.org
Subject: Re: [PATCH 1/4] mm: Use frozen pages for page tables
Date: Wed, 19 Nov 2025 10:46:54 -0500	[thread overview]
Message-ID: <20251119154654.GA606021@gmail.com> (raw)
In-Reply-To: <20251113140448.1814860-2-willy@infradead.org>

On Thu, Nov 13, 2025 at 02:04:43PM +0000, Matthew Wilcox (Oracle) wrote:
> Page tables do not use the reference count.  That means we can avoid
> two atomic operations (one on alloc, one on free) by allocating frozen
> pages here.  This does not interfere with compaction as page tables are
> non-movable allocations.
> 
> pagetable_alloc() and pagetable_free() need to move out of line to make
> this work as alloc_frozen_page() and free_frozen_page() are not exported
> outside the mm for now.  We'll want them out of line anyway soon.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/mm.h   | 53 +++++---------------------------------------
>  mm/memory.c          | 34 ++++++++++++++++++++++++++++
>  mm/pgtable-generic.c |  3 ++-
>  3 files changed, 42 insertions(+), 48 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 5087deecdd9c..e168ee23091e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2995,58 +2995,17 @@ static inline void ptdesc_clear_kernel(struct ptdesc *ptdesc)
>   */
>  static inline bool ptdesc_test_kernel(const struct ptdesc *ptdesc)
>  {
> +#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
>  	return test_bit(PT_kernel, &ptdesc->pt_flags.f);
> +#else
> +	return false;
> +#endif
>  }
>  
> -/**
> - * pagetable_alloc - Allocate pagetables
> - * @gfp:    GFP flags
> - * @order:  desired pagetable order
> - *
> - * pagetable_alloc allocates memory for page tables as well as a page table
> - * descriptor to describe that memory.
> - *
> - * Return: The ptdesc describing the allocated page tables.
> - */
> -static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
> -{
> -	struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order);
> -
> -	return page_ptdesc(page);
> -}
> +struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order);
>  #define pagetable_alloc(...)	alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
> -
> -static inline void __pagetable_free(struct ptdesc *pt)
> -{
> -	struct page *page = ptdesc_page(pt);
> -
> -	__free_pages(page, compound_order(page));
> -}
> -
> -#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
> +void pagetable_free(struct ptdesc *pt);
>  void pagetable_free_kernel(struct ptdesc *pt);
> -#else
> -static inline void pagetable_free_kernel(struct ptdesc *pt)
> -{
> -	__pagetable_free(pt);
> -}
> -#endif
> -/**
> - * pagetable_free - Free pagetables
> - * @pt:	The page table descriptor
> - *
> - * pagetable_free frees the memory of all page tables described by a page
> - * table descriptor and the memory for the descriptor itself.
> - */
> -static inline void pagetable_free(struct ptdesc *pt)
> -{
> -	if (ptdesc_test_kernel(pt)) {
> -		ptdesc_clear_kernel(pt);
> -		pagetable_free_kernel(pt);
> -	} else {
> -		__pagetable_free(pt);
> -	}
> -}
>  
>  #if defined(CONFIG_SPLIT_PTE_PTLOCKS)
>  #if ALLOC_SPLIT_PTLOCKS
> diff --git a/mm/memory.c b/mm/memory.c
> index 1c66ee83a7ab..781cd7f607f7 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -7338,6 +7338,40 @@ long copy_folio_from_user(struct folio *dst_folio,
>  }
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>  
> +/**
> + * pagetable_alloc - Allocate pagetables
> + * @gfp:    GFP flags
> + * @order:  desired pagetable order
> + *
> + * pagetable_alloc allocates memory for page tables as well as a page table
> + * descriptor to describe that memory.
> + *
> + * Return: The ptdesc describing the allocated page tables.
> + */
> +struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
> +{
> +	struct page *page = alloc_frozen_pages_noprof(gfp | __GFP_COMP, order);
> +
> +	return page_ptdesc(page);
> +}
> +
> +/**
> + * pagetable_free - Free pagetables
> + * @pt:	The page table descriptor
> + *
> + * pagetable_free frees the memory of all page tables described by a page
> + * table descriptor and the memory for the descriptor itself.
> + */
> +void pagetable_free(struct ptdesc *pt)
> +{
> +	struct page *page = ptdesc_page(pt);
> +
> +	if (ptdesc_test_kernel(pt))
> +		pagetable_free_kernel(pt);

Should we use test_and_clear_bit() here to prevent the double free?
Or it is unnecessary because the caller will guarantee there is no other
thread that will free the same pagetables.

> +	else
> +		free_frozen_pages(page, compound_order(page));
> +}
> +
>  #if defined(CONFIG_SPLIT_PTE_PTLOCKS) && ALLOC_SPLIT_PTLOCKS
>  
>  static struct kmem_cache *page_ptl_cachep;
> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> index d3aec7a9926a..597049e21ac1 100644
> --- a/mm/pgtable-generic.c
> +++ b/mm/pgtable-generic.c
> @@ -434,11 +434,12 @@ static void kernel_pgtable_work_func(struct work_struct *work)
>  
>  	iommu_sva_invalidate_kva_range(PAGE_OFFSET, TLB_FLUSH_ALL);
>  	list_for_each_entry_safe(pt, next, &page_list, pt_list)
> -		__pagetable_free(pt);
> +		pagetable_free(pt);
>  }
>  
>  void pagetable_free_kernel(struct ptdesc *pt)
>  {
> +	ptdesc_clear_kernel(pt);
>  	spin_lock(&kernel_pgtable_work.lock);
>  	list_add(&pt->pt_list, &kernel_pgtable_work.list);
>  	spin_unlock(&kernel_pgtable_work.lock);
> -- 
> 2.47.2
> 
>

Thanks,
Chih-En Lin


  parent reply	other threads:[~2025-11-19 15:47 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13 14:04 [PATCH 0/4] Convert pgtable to use frozen pages Matthew Wilcox (Oracle)
2025-11-13 14:04 ` [PATCH 1/4] mm: Use frozen pages for page tables Matthew Wilcox (Oracle)
2025-11-13 18:24   ` Vishal Moola (Oracle)
2025-11-13 19:14     ` Vishal Moola (Oracle)
2025-11-14 13:45       ` Matthew Wilcox
2025-11-14 14:31       ` Will Deacon
2025-11-17 14:38   ` kernel test robot
2025-11-18  0:44     ` Vishal Moola (Oracle)
2025-11-19 15:46   ` Chih-En Lin [this message]
2025-11-20 13:55     ` David Hildenbrand (Red Hat)
2025-11-13 14:04 ` [PATCH 2/4] mm: Account pagetable memory when allocated Matthew Wilcox (Oracle)
2025-11-13 19:39   ` Vishal Moola (Oracle)
2025-11-13 14:04 ` [PATCH 3/4] mm: Mark " Matthew Wilcox (Oracle)
2025-11-18 17:00   ` David Hildenbrand (Red Hat)
2025-11-13 14:04 ` [PATCH 4/4] pgtable: Remove uses of page->lru Matthew Wilcox (Oracle)
2025-11-20 13:56   ` David Hildenbrand (Red Hat)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251119154654.GA606021@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=vishal.moola@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox