* [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
@ 2025-04-09 0:49 nifan.cxl
2025-04-09 3:15 ` Matthew Wilcox
0 siblings, 1 reply; 9+ messages in thread
From: nifan.cxl @ 2025-04-09 0:49 UTC (permalink / raw)
To: muchun.song, willy
Cc: mcgrof, a.manzanares, dave, akpm, david, linux-mm, linux-kernel, Fan Ni
From: Fan Ni <fan.ni@samsung.com>
Convert the use of &folio->page to folio_page(folio, 0) where struct
filio fits in. This is part of the efforts to move some fields out of
struct page to reduce its size.
Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
mm/huge_memory.c | 30 +++++++++++++++++-------------
mm/hugetlb.c | 20 +++++++++++---------
mm/hugetlb_vmemmap.c | 12 ++++++------
mm/khugepaged.c | 17 ++++++++++-------
4 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 28c87e0e036f..4c91fc594b6c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1492,7 +1492,7 @@ vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
ptl = pmd_lock(mm, vmf->pmd);
if (pmd_none(*vmf->pmd)) {
folio_get(folio);
- folio_add_file_rmap_pmd(folio, &folio->page, vma);
+ folio_add_file_rmap_pmd(folio, folio_page(folio, 0), vma);
add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
}
error = insert_pfn_pmd(vma, addr, vmf->pmd,
@@ -1620,7 +1620,7 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
*/
if (pud_none(*vmf->pud)) {
folio_get(folio);
- folio_add_file_rmap_pud(folio, &folio->page, vma);
+ folio_add_file_rmap_pud(folio, folio_page(folio, 0), vma);
add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR);
}
insert_pfn_pud(vma, addr, vmf->pud, pfn_to_pfn_t(folio_pfn(folio)),
@@ -2262,8 +2262,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
spin_unlock(ptl);
- if (flush_needed)
- tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
+ if (flush_needed) {
+ tlb_remove_page_size(tlb, folio_page(folio, 0),
+ HPAGE_PMD_SIZE);
+ }
}
return 1;
}
@@ -2630,7 +2632,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
}
if (src_folio) {
if (folio_maybe_dma_pinned(src_folio) ||
- !PageAnonExclusive(&src_folio->page)) {
+ !PageAnonExclusive(folio_page(src_folio, 0))) {
err = -EBUSY;
goto unlock_ptls;
}
@@ -3316,7 +3318,7 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
* the flags from the original folio.
*/
for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
- struct page *new_head = &folio->page + i;
+ struct page *new_head = folio_page(folio, i);
/*
* Careful: new_folio is not a "real" folio before we cleared PageTail.
@@ -3403,7 +3405,7 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
if (new_order)
folio_set_order(folio, new_order);
else
- ClearPageCompound(&folio->page);
+ ClearPageCompound(folio_page(folio, 0));
}
/*
@@ -3528,7 +3530,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
}
folio_split_memcg_refs(folio, old_order, split_order);
- split_page_owner(&folio->page, old_order, split_order);
+ split_page_owner(folio_page(folio, 0), old_order, split_order);
pgalloc_tag_split(folio, old_order, split_order);
__split_folio_to_order(folio, old_order, split_order);
@@ -3640,7 +3642,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
* requires taking the lru_lock so we do the put_page
* of the tail pages after the split is complete.
*/
- free_page_and_swap_cache(&new_folio->page);
+ free_page_and_swap_cache(folio_page(new_folio, 0));
}
return ret;
}
@@ -3971,7 +3973,8 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
{
struct folio *folio = page_folio(page);
- return __folio_split(folio, new_order, &folio->page, page, list, true);
+ return __folio_split(folio, new_order, folio_page(folio, 0), page,
+ list, true);
}
/*
@@ -3999,8 +4002,8 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
int folio_split(struct folio *folio, unsigned int new_order,
struct page *split_at, struct list_head *list)
{
- return __folio_split(folio, new_order, split_at, &folio->page, list,
- false);
+ return __folio_split(folio, new_order, split_at, folio_page(folio, 0),
+ list, false);
}
int min_order_for_split(struct folio *folio)
@@ -4024,7 +4027,8 @@ int split_folio_to_list(struct folio *folio, struct list_head *list)
if (ret < 0)
return ret;
- return split_huge_page_to_list_to_order(&folio->page, list, ret);
+ return split_huge_page_to_list_to_order(folio_page(folio, 0), list,
+ ret);
}
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 73417bea33f5..efbf5013986e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1306,7 +1306,7 @@ static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
if (folio_test_hwpoison(folio))
continue;
- if (is_migrate_isolate_page(&folio->page))
+ if (is_migrate_isolate_page(folio_page(folio, 0)))
continue;
list_move(&folio->lru, &h->hugepage_activelist);
@@ -1840,7 +1840,7 @@ void free_huge_folio(struct folio *folio)
hugetlb_set_folio_subpool(folio, NULL);
if (folio_test_anon(folio))
- __ClearPageAnonExclusive(&folio->page);
+ __ClearPageAnonExclusive(folio_page(folio, 0));
folio->mapping = NULL;
restore_reserve = folio_test_hugetlb_restore_reserve(folio);
folio_clear_hugetlb_restore_reserve(folio);
@@ -4050,7 +4050,8 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
list_del(&folio->lru);
- split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
+ split_page_owner(folio_page(folio, 0), huge_page_order(src),
+ huge_page_order(dst));
pgalloc_tag_split(folio, huge_page_order(src), huge_page_order(dst));
for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
@@ -6185,9 +6186,9 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
* be leaks between processes, for example, with FOLL_GET users.
*/
if (folio_mapcount(old_folio) == 1 && folio_test_anon(old_folio)) {
- if (!PageAnonExclusive(&old_folio->page)) {
+ if (!PageAnonExclusive(folio_page(old_folio, 0))) {
folio_move_anon_rmap(old_folio, vma);
- SetPageAnonExclusive(&old_folio->page);
+ SetPageAnonExclusive(folio_page(old_folio, 0));
}
if (likely(!unshare))
set_huge_ptep_maybe_writable(vma, vmf->address,
@@ -6197,7 +6198,8 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
return 0;
}
VM_BUG_ON_PAGE(folio_test_anon(old_folio) &&
- PageAnonExclusive(&old_folio->page), &old_folio->page);
+ PageAnonExclusive(folio_page(old_folio, 0)),
+ folio_page(old_folio, 0));
/*
* If the process that created a MAP_PRIVATE mapping is about to
@@ -6249,8 +6251,8 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- unmap_ref_private(mm, vma, &old_folio->page,
- vmf->address);
+ unmap_ref_private(mm, vma, folio_page(old_folio, 0),
+ vmf->address);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vma_lock_read(vma);
@@ -7832,7 +7834,7 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
struct hstate *h = folio_hstate(old_folio);
hugetlb_cgroup_migrate(old_folio, new_folio);
- set_page_owner_migrate_reason(&new_folio->page, reason);
+ set_page_owner_migrate_reason(folio_page(new_folio, 0), reason);
/*
* transfer temporary state of the new hugetlb folio. This is
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 9a99dfa3c495..97a7a67515cd 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -454,7 +454,7 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
struct folio *folio, unsigned long flags)
{
int ret;
- unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
+ unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
@@ -566,7 +566,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
unsigned long flags)
{
int ret = 0;
- unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
+ unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
@@ -632,7 +632,7 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *folio)
{
- unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
+ unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
unsigned long vmemmap_reuse;
if (!vmemmap_should_optimize_folio(h, folio))
@@ -668,13 +668,13 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
* Already optimized by pre-HVO, just map the
* mirrored tail page structs RO.
*/
- spfn = (unsigned long)&folio->page;
+ spfn = (unsigned long)folio_page(folio, 0);
epfn = spfn + pages_per_huge_page(h);
vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
HUGETLB_VMEMMAP_RESERVE_SIZE);
register_page_bootmem_memmap(pfn_to_section_nr(spfn),
- &folio->page,
- HUGETLB_VMEMMAP_RESERVE_SIZE);
+ folio_page(folio, 0),
+ HUGETLB_VMEMMAP_RESERVE_SIZE);
static_branch_inc(&hugetlb_optimize_vmemmap_key);
continue;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b8838ba8207a..79d18ff5cda5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -696,14 +696,16 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_LACK_REFERENCED_PAGE;
} else {
result = SCAN_SUCCEED;
- trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
- referenced, writable, result);
+ trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
+ none_or_zero, referenced,
+ writable, result);
return result;
}
out:
release_pte_pages(pte, _pte, compound_pagelist);
- trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
- referenced, writable, result);
+ trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
+ none_or_zero, referenced,
+ writable, result);
return result;
}
@@ -1435,8 +1437,9 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
*mmap_locked = false;
}
out:
- trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced,
- none_or_zero, result, unmapped);
+ trace_mm_khugepaged_scan_pmd(mm, folio_page(folio, 0), writable,
+ referenced, none_or_zero, result,
+ unmapped);
return result;
}
@@ -1689,7 +1692,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
maybe_install_pmd:
/* step 5: install pmd entry */
result = install_pmd
- ? set_huge_pmd(vma, haddr, pmd, &folio->page)
+ ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
: SCAN_SUCCEED;
goto drop_folio;
abort:
--
2.47.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 0:49 [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0) nifan.cxl
@ 2025-04-09 3:15 ` Matthew Wilcox
2025-04-09 9:03 ` David Hildenbrand
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Matthew Wilcox @ 2025-04-09 3:15 UTC (permalink / raw)
To: nifan.cxl
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, david, linux-mm,
linux-kernel, Fan Ni
On Tue, Apr 08, 2025 at 05:49:10PM -0700, nifan.cxl@gmail.com wrote:
> From: Fan Ni <fan.ni@samsung.com>
>
> Convert the use of &folio->page to folio_page(folio, 0) where struct
> filio fits in. This is part of the efforts to move some fields out of
> struct page to reduce its size.
Thanks for sending the patch. You've mixed together quite a few things;
I'd suggest focusing on one API at a time.
> folio_get(folio);
> - folio_add_file_rmap_pmd(folio, &folio->page, vma);
> + folio_add_file_rmap_pmd(folio, folio_page(folio, 0), vma);
> add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
I think this is fine, but would defer to David Hildenbrand.
> folio_get(folio);
> - folio_add_file_rmap_pud(folio, &folio->page, vma);
> + folio_add_file_rmap_pud(folio, folio_page(folio, 0), vma);
> add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR);
If that is fine, then so is this (put them in the same patchset).
> spin_unlock(ptl);
> - if (flush_needed)
> - tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
> + if (flush_needed) {
> + tlb_remove_page_size(tlb, folio_page(folio, 0),
> + HPAGE_PMD_SIZE);
> + }
You don't need to add the extra braces here. I haven't looked into this
family of APIs; not sure if we should be passing the folio here or
if it should be taking a folio argument.
> if (folio_maybe_dma_pinned(src_folio) ||
> - !PageAnonExclusive(&src_folio->page)) {
> + !PageAnonExclusive(folio_page(src_folio, 0))) {
> err = -EBUSY;
mmm. Another David question.
> for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
> - struct page *new_head = &folio->page + i;
> + struct page *new_head = folio_page(folio, i);
>
This is definitely the right thing to do.
> @@ -3403,7 +3405,7 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
> if (new_order)
> folio_set_order(folio, new_order);
> else
> - ClearPageCompound(&folio->page);
> + ClearPageCompound(folio_page(folio, 0));
> }
I might be inclined to leave this one alone; this whole function needs
to be rewritten as part of the folio split.
> folio_split_memcg_refs(folio, old_order, split_order);
> - split_page_owner(&folio->page, old_order, split_order);
> + split_page_owner(folio_page(folio, 0), old_order, split_order);
> pgalloc_tag_split(folio, old_order, split_order);
Not sure if split_folio_owner is something that should exist. Haven't
looked into it.
> */
> - free_page_and_swap_cache(&new_folio->page);
> + free_page_and_swap_cache(folio_page(new_folio, 0));
> }
free_page_and_swap_cache() should be converted to be
free_folio_and_swap_cache().
>
> - return __folio_split(folio, new_order, &folio->page, page, list, true);
> + return __folio_split(folio, new_order, folio_page(folio, 0), page,
> + list, true);
> }
Probably right.
> {
> - return __folio_split(folio, new_order, split_at, &folio->page, list,
> - false);
> + return __folio_split(folio, new_order, split_at, folio_page(folio, 0),
> + list, false);
> }
Ditto.
>
> - return split_huge_page_to_list_to_order(&folio->page, list, ret);
> + return split_huge_page_to_list_to_order(folio_page(folio, 0), list,
> + ret);
> }
Ditto.
>
> - if (is_migrate_isolate_page(&folio->page))
> + if (is_migrate_isolate_page(folio_page(folio, 0)))
> continue;
I think we need an is_migrate_isolate_folio() instead of this.
> if (folio_test_anon(folio))
> - __ClearPageAnonExclusive(&folio->page);
> + __ClearPageAnonExclusive(folio_page(folio, 0));
> folio->mapping = NULL;
... David.
>
> - split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
> + split_page_owner(folio_page(folio, 0), huge_page_order(src),
> + huge_page_order(dst));
See earlier.
> if (folio_mapcount(old_folio) == 1 && folio_test_anon(old_folio)) {
> - if (!PageAnonExclusive(&old_folio->page)) {
> + if (!PageAnonExclusive(folio_page(old_folio, 0))) {
> folio_move_anon_rmap(old_folio, vma);
> - SetPageAnonExclusive(&old_folio->page);
> + SetPageAnonExclusive(folio_page(old_folio, 0));
> }
David.
> }
> VM_BUG_ON_PAGE(folio_test_anon(old_folio) &&
> - PageAnonExclusive(&old_folio->page), &old_folio->page);
> + PageAnonExclusive(folio_page(old_folio, 0)),
> + folio_page(old_folio, 0));
The PageAnonExclusive() part of this change is for David to comment on,
but this should be a VM_BUG_ON_FOLIO() instead of calling folio_page()
to keep this a VM_BUG_ON_PAGE().
>
> - unmap_ref_private(mm, vma, &old_folio->page,
> - vmf->address);
> + unmap_ref_private(mm, vma, folio_page(old_folio, 0),
> + vmf->address);
unmap_ref_private() only has one caller (this one), so make that take a
folio. This is a whole series, all by itself.
> hugetlb_cgroup_migrate(old_folio, new_folio);
> - set_page_owner_migrate_reason(&new_folio->page, reason);
> + set_page_owner_migrate_reason(folio_page(new_folio, 0), reason);
>
See earlier about page owner being folio or page based.
> int ret;
> - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> unsigned long vmemmap_reuse;
Probably right.
> int ret = 0;
> - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> unsigned long vmemmap_reuse;
Ditto.
> - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> unsigned long vmemmap_reuse;
Ditto.
> */
> - spfn = (unsigned long)&folio->page;
> + spfn = (unsigned long)folio_page(folio, 0);
Ditto.
> register_page_bootmem_memmap(pfn_to_section_nr(spfn),
> - &folio->page,
> - HUGETLB_VMEMMAP_RESERVE_SIZE);
> + folio_page(folio, 0),
> + HUGETLB_VMEMMAP_RESERVE_SIZE);
Don't change the indentation, but looks right.
> result = SCAN_SUCCEED;
> - trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
> - referenced, writable, result);
> + trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
> + none_or_zero, referenced,
> + writable, result);
> return result;
trace_mm_collapse_huge_page_isolate() should take a folio.
> release_pte_pages(pte, _pte, compound_pagelist);
> - trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
> - referenced, writable, result);
> + trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
> + none_or_zero, referenced,
> + writable, result);
> return result;
ditto.
> out:
> - trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced,
> - none_or_zero, result, unmapped);
> + trace_mm_khugepaged_scan_pmd(mm, folio_page(folio, 0), writable,
> + referenced, none_or_zero, result,
> + unmapped);
> return result;
ditto,
> result = install_pmd
> - ? set_huge_pmd(vma, haddr, pmd, &folio->page)
> + ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
> : SCAN_SUCCEED;
I feel that set_huge_pmd() should take a folio.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 3:15 ` Matthew Wilcox
@ 2025-04-09 9:03 ` David Hildenbrand
2025-04-09 17:27 ` Zi Yan
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: David Hildenbrand @ 2025-04-09 9:03 UTC (permalink / raw)
To: Matthew Wilcox, nifan.cxl
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, linux-mm,
linux-kernel, Fan Ni
On 09.04.25 05:15, Matthew Wilcox wrote:
> On Tue, Apr 08, 2025 at 05:49:10PM -0700, nifan.cxl@gmail.com wrote:
>> From: Fan Ni <fan.ni@samsung.com>
>>
>> Convert the use of &folio->page to folio_page(folio, 0) where struct
>> filio fits in. This is part of the efforts to move some fields out of
>> struct page to reduce its size.
>
> Thanks for sending the patch. You've mixed together quite a few things;
> I'd suggest focusing on one API at a time.
Agreed.
>
>> folio_get(folio);
>> - folio_add_file_rmap_pmd(folio, &folio->page, vma);
>> + folio_add_file_rmap_pmd(folio, folio_page(folio, 0), vma);
>> add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
>
> I think this is fine, but would defer to David Hildenbrand.
For now this should be fine. We want a pointer at the actual first page.
In some future (with folios spanning multiple PMDs), this will not be
correct.
But the THP changes should *absolutely* not be included in this hugetlb
patch. I was severly confused staring at the usage of
folio_add_file_rmap_pmd() in hugetlb context/
Actually, having to go back to my comments below to fix them up now
that I see that this is
mm/huge_memory.c | 30 +++++++++++++++++-------------
mm/hugetlb.c | 20 +++++++++++---------
mm/hugetlb_vmemmap.c | 12 ++++++------
Makes me angry.
>
>> folio_get(folio);
>> - folio_add_file_rmap_pud(folio, &folio->page, vma);
>> + folio_add_file_rmap_pud(folio, folio_page(folio, 0), vma);
>> add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR);
>
> If that is fine, then so is this (put them in the same patchset).
>
>> spin_unlock(ptl);
>> - if (flush_needed)
>> - tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
>> + if (flush_needed) {
>> + tlb_remove_page_size(tlb, folio_page(folio, 0),
>> + HPAGE_PMD_SIZE);
>> + }
>
> You don't need to add the extra braces here. I haven't looked into this
> family of APIs; not sure if we should be passing the folio here or
> if it should be taking a folio argument.
>
>> if (folio_maybe_dma_pinned(src_folio) ||
>> - !PageAnonExclusive(&src_folio->page)) {
>> + !PageAnonExclusive(folio_page(src_folio, 0))) {
>> err = -EBUSY;
>
> mmm. Another David question.
For now this should be correct. (first page mapped by the PMD stores the
flag)
>
>> for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
>> - struct page *new_head = &folio->page + i;
>> + struct page *new_head = folio_page(folio, i);
>>
>
> This is definitely the right thing to do.
>
>> @@ -3403,7 +3405,7 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
>> if (new_order)
>> folio_set_order(folio, new_order);
>> else
>> - ClearPageCompound(&folio->page);
>> + ClearPageCompound(folio_page(folio, 0));
>> }
>
> I might be inclined to leave this one alone; this whole function needs
> to be rewritten as part of the folio split.
>
>> folio_split_memcg_refs(folio, old_order, split_order);
>> - split_page_owner(&folio->page, old_order, split_order);
>> + split_page_owner(folio_page(folio, 0), old_order, split_order);
>> pgalloc_tag_split(folio, old_order, split_order);
>
> Not sure if split_folio_owner is something that should exist. Haven't
> looked into it.
>
>> */
>> - free_page_and_swap_cache(&new_folio->page);
>> + free_page_and_swap_cache(folio_page(new_folio, 0));
>> }
>
> free_page_and_swap_cache() should be converted to be
> free_folio_and_swap_cache().
>
>>
>> - return __folio_split(folio, new_order, &folio->page, page, list, true);
>> + return __folio_split(folio, new_order, folio_page(folio, 0), page,
>> + list, true);
>> }
>
> Probably right.
>
>> {
>> - return __folio_split(folio, new_order, split_at, &folio->page, list,
>> - false);
>> + return __folio_split(folio, new_order, split_at, folio_page(folio, 0),
>> + list, false);
>> }
>
> Ditto.
>
>>
>> - return split_huge_page_to_list_to_order(&folio->page, list, ret);
>> + return split_huge_page_to_list_to_order(folio_page(folio, 0), list,
>> + ret);
>> }
>
> Ditto.
>
>>
>> - if (is_migrate_isolate_page(&folio->page))
>> + if (is_migrate_isolate_page(folio_page(folio, 0)))
>> continue;
>
> I think we need an is_migrate_isolate_folio() instead of this.
>
>> if (folio_test_anon(folio))
>> - __ClearPageAnonExclusive(&folio->page);
>> + __ClearPageAnonExclusive(folio_page(folio, 0));
>> folio->mapping = NULL;
>
> ... David.
See above.
>
>>
>> - split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
>> + split_page_owner(folio_page(folio, 0), huge_page_order(src),
>> + huge_page_order(dst));
>
> See earlier.
>
>> if (folio_mapcount(old_folio) == 1 && folio_test_anon(old_folio)) {
>> - if (!PageAnonExclusive(&old_folio->page)) {
>> + if (!PageAnonExclusive(folio_page(old_folio, 0))) {
>> folio_move_anon_rmap(old_folio, vma);
>> - SetPageAnonExclusive(&old_folio->page);
>> + SetPageAnonExclusive(folio_page(old_folio, 0));
>> }
>
> David.
See above.
>
>> }
>> VM_BUG_ON_PAGE(folio_test_anon(old_folio) &&
>> - PageAnonExclusive(&old_folio->page), &old_folio->page);
>> + PageAnonExclusive(folio_page(old_folio, 0)),
>> + folio_page(old_folio, 0));
>
> The PageAnonExclusive() part of this change is for David to comment on,
> but this should be a VM_BUG_ON_FOLIO() instead of calling folio_page()
> to keep this a VM_BUG_ON_PAGE().
Agreed.
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 3:15 ` Matthew Wilcox
2025-04-09 9:03 ` David Hildenbrand
@ 2025-04-09 17:27 ` Zi Yan
2025-04-09 22:07 ` Fan Ni
2025-04-18 22:42 ` Fan Ni
3 siblings, 0 replies; 9+ messages in thread
From: Zi Yan @ 2025-04-09 17:27 UTC (permalink / raw)
To: Matthew Wilcox, nifan.cxl
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, david, linux-mm,
linux-kernel, Fan Ni
On Tue Apr 8, 2025 at 11:15 PM EDT, Matthew Wilcox wrote:
> On Tue, Apr 08, 2025 at 05:49:10PM -0700, nifan.cxl@gmail.com wrote:
>> From: Fan Ni <fan.ni@samsung.com>
>>
>> Convert the use of &folio->page to folio_page(folio, 0) where struct
>> filio fits in. This is part of the efforts to move some fields out of
>> struct page to reduce its size.
>
> Thanks for sending the patch. You've mixed together quite a few things;
> I'd suggest focusing on one API at a time.
>
<snip>
>> @@ -3403,7 +3405,7 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
>> if (new_order)
>> folio_set_order(folio, new_order);
>> else
>> - ClearPageCompound(&folio->page);
>> + ClearPageCompound(folio_page(folio, 0));
>> }
>
> I might be inclined to leave this one alone; this whole function needs
> to be rewritten as part of the folio split.
You mean __split_folio_to_order() needs a rewrite or
a folio version of ClearPageCompound()? Some thing like
__folio_clear_compound()?
>
>> folio_split_memcg_refs(folio, old_order, split_order);
>> - split_page_owner(&folio->page, old_order, split_order);
>> + split_page_owner(folio_page(folio, 0), old_order, split_order);
>> pgalloc_tag_split(folio, old_order, split_order);
>
> Not sure if split_folio_owner is something that should exist. Haven't
> looked into it.
page owner info seems to be per page, but it should be OK to have
split_folio_owner() and do page level operations inside.
>
>> */
>> - free_page_and_swap_cache(&new_folio->page);
>> + free_page_and_swap_cache(folio_page(new_folio, 0));
>> }
>
> free_page_and_swap_cache() should be converted to be
> free_folio_and_swap_cache().
>
>>
>> - return __folio_split(folio, new_order, &folio->page, page, list, true);
>> + return __folio_split(folio, new_order, folio_page(folio, 0), page,
>> + list, true);
>> }
>
> Probably right.
Yes, this is uniform split, using first page as split_at works.
>
>> {
>> - return __folio_split(folio, new_order, split_at, &folio->page, list,
>> - false);
>> + return __folio_split(folio, new_order, split_at, folio_page(folio, 0),
>> + list, false);
>> }
>
> Ditto.
Right. For non-uniform split, caller holds the folio lock, so lock_at,
which tells the folio containing lock_at to keep locked when returning to caller,
is the first page.
>
>>
>> - return split_huge_page_to_list_to_order(&folio->page, list, ret);
>> + return split_huge_page_to_list_to_order(folio_page(folio, 0), list,
>> + ret);
>> }
>
> Ditto.
Yes. And there are two additional instances in include/linux/huge_mm.h
(Yeah, the header file of mm/huge_memory.c is huge_mm.h).
--
Best Regards,
Yan, Zi
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 3:15 ` Matthew Wilcox
2025-04-09 9:03 ` David Hildenbrand
2025-04-09 17:27 ` Zi Yan
@ 2025-04-09 22:07 ` Fan Ni
2025-04-10 3:48 ` Matthew Wilcox
2025-04-18 22:42 ` Fan Ni
3 siblings, 1 reply; 9+ messages in thread
From: Fan Ni @ 2025-04-09 22:07 UTC (permalink / raw)
To: Matthew Wilcox
Cc: nifan.cxl, muchun.song, mcgrof, a.manzanares, dave, akpm, david,
linux-mm, linux-kernel
On Wed, Apr 09, 2025 at 04:15:30AM +0100, Matthew Wilcox wrote:
> On Tue, Apr 08, 2025 at 05:49:10PM -0700, nifan.cxl@gmail.com wrote:
> > From: Fan Ni <fan.ni@samsung.com>
> >
> > Convert the use of &folio->page to folio_page(folio, 0) where struct
> > filio fits in. This is part of the efforts to move some fields out of
> > struct page to reduce its size.
>
> Thanks for sending the patch. You've mixed together quite a few things;
> I'd suggest focusing on one API at a time.
...
> > */
> > - free_page_and_swap_cache(&new_folio->page);
> > + free_page_and_swap_cache(folio_page(new_folio, 0));
> > }
>
> free_page_and_swap_cache() should be converted to be
> free_folio_and_swap_cache().
While looking into this function, I see it is defined as a macro in
swap.h as below,
#define free_page_and_swap_cache(page) \
put_page(page)
While checking put_page() in include/linux.mm.h, it always converts page
to folio, is there a reason why we do not take "folio" directly?
static inline void put_page(struct page *page)
{
struct folio *folio = page_folio(page);
if (folio_test_slab(folio))
return;
folio_put(folio);
}
Fan
>
>
> >
> > - return __folio_split(folio, new_order, &folio->page, page, list, true);
> > + return __folio_split(folio, new_order, folio_page(folio, 0), page,
> > + list, true);
> > }
>
> Probably right.
>
> > {
> > - return __folio_split(folio, new_order, split_at, &folio->page, list,
> > - false);
> > + return __folio_split(folio, new_order, split_at, folio_page(folio, 0),
> > + list, false);
> > }
>
> Ditto.
>
> >
> > - return split_huge_page_to_list_to_order(&folio->page, list, ret);
> > + return split_huge_page_to_list_to_order(folio_page(folio, 0), list,
> > + ret);
> > }
>
> Ditto.
>
> >
> > - if (is_migrate_isolate_page(&folio->page))
> > + if (is_migrate_isolate_page(folio_page(folio, 0)))
> > continue;
>
> I think we need an is_migrate_isolate_folio() instead of this.
>
> > if (folio_test_anon(folio))
> > - __ClearPageAnonExclusive(&folio->page);
> > + __ClearPageAnonExclusive(folio_page(folio, 0));
> > folio->mapping = NULL;
>
> ... David.
>
> >
> > - split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
> > + split_page_owner(folio_page(folio, 0), huge_page_order(src),
> > + huge_page_order(dst));
>
> See earlier.
>
> > if (folio_mapcount(old_folio) == 1 && folio_test_anon(old_folio)) {
> > - if (!PageAnonExclusive(&old_folio->page)) {
> > + if (!PageAnonExclusive(folio_page(old_folio, 0))) {
> > folio_move_anon_rmap(old_folio, vma);
> > - SetPageAnonExclusive(&old_folio->page);
> > + SetPageAnonExclusive(folio_page(old_folio, 0));
> > }
>
> David.
>
> > }
> > VM_BUG_ON_PAGE(folio_test_anon(old_folio) &&
> > - PageAnonExclusive(&old_folio->page), &old_folio->page);
> > + PageAnonExclusive(folio_page(old_folio, 0)),
> > + folio_page(old_folio, 0));
>
> The PageAnonExclusive() part of this change is for David to comment on,
> but this should be a VM_BUG_ON_FOLIO() instead of calling folio_page()
> to keep this a VM_BUG_ON_PAGE().
>
> >
> > - unmap_ref_private(mm, vma, &old_folio->page,
> > - vmf->address);
> > + unmap_ref_private(mm, vma, folio_page(old_folio, 0),
> > + vmf->address);
>
> unmap_ref_private() only has one caller (this one), so make that take a
> folio. This is a whole series, all by itself.
>
> > hugetlb_cgroup_migrate(old_folio, new_folio);
> > - set_page_owner_migrate_reason(&new_folio->page, reason);
> > + set_page_owner_migrate_reason(folio_page(new_folio, 0), reason);
> >
>
> See earlier about page owner being folio or page based.
>
> > int ret;
> > - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> > + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> > unsigned long vmemmap_reuse;
>
> Probably right.
>
> > int ret = 0;
> > - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> > + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> > unsigned long vmemmap_reuse;
>
> Ditto.
>
> > - unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end;
> > + unsigned long vmemmap_start = (unsigned long)folio_page(folio, 0), vmemmap_end;
> > unsigned long vmemmap_reuse;
>
> Ditto.
>
> > */
> > - spfn = (unsigned long)&folio->page;
> > + spfn = (unsigned long)folio_page(folio, 0);
>
> Ditto.
>
> > register_page_bootmem_memmap(pfn_to_section_nr(spfn),
> > - &folio->page,
> > - HUGETLB_VMEMMAP_RESERVE_SIZE);
> > + folio_page(folio, 0),
> > + HUGETLB_VMEMMAP_RESERVE_SIZE);
>
> Don't change the indentation, but looks right.
>
> > result = SCAN_SUCCEED;
> > - trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
> > - referenced, writable, result);
> > + trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
> > + none_or_zero, referenced,
> > + writable, result);
> > return result;
>
> trace_mm_collapse_huge_page_isolate() should take a folio.
>
> > release_pte_pages(pte, _pte, compound_pagelist);
> > - trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
> > - referenced, writable, result);
> > + trace_mm_collapse_huge_page_isolate(folio_page(folio, 0),
> > + none_or_zero, referenced,
> > + writable, result);
> > return result;
>
> ditto.
>
> > out:
> > - trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced,
> > - none_or_zero, result, unmapped);
> > + trace_mm_khugepaged_scan_pmd(mm, folio_page(folio, 0), writable,
> > + referenced, none_or_zero, result,
> > + unmapped);
> > return result;
>
> ditto,
>
> > result = install_pmd
> > - ? set_huge_pmd(vma, haddr, pmd, &folio->page)
> > + ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
> > : SCAN_SUCCEED;
>
> I feel that set_huge_pmd() should take a folio.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 22:07 ` Fan Ni
@ 2025-04-10 3:48 ` Matthew Wilcox
0 siblings, 0 replies; 9+ messages in thread
From: Matthew Wilcox @ 2025-04-10 3:48 UTC (permalink / raw)
To: Fan Ni
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, david, linux-mm,
linux-kernel
On Wed, Apr 09, 2025 at 03:07:50PM -0700, Fan Ni wrote:
> On Wed, Apr 09, 2025 at 04:15:30AM +0100, Matthew Wilcox wrote:
> > > */
> > > - free_page_and_swap_cache(&new_folio->page);
> > > + free_page_and_swap_cache(folio_page(new_folio, 0));
> > > }
> >
> > free_page_and_swap_cache() should be converted to be
> > free_folio_and_swap_cache().
>
> While looking into this function, I see it is defined as a macro in
> swap.h as below,
> #define free_page_and_swap_cache(page) \
> put_page(page)
>
> While checking put_page() in include/linux.mm.h, it always converts page
> to folio, is there a reason why we do not take "folio" directly?
There are a lot of places to convert!
$ git grep '[^a-zA-Z_>]put_page(' |wc -l
472
... and that's after four years of doing folio conversions.
free_folio_and_swap_cache() should call folio_put() in the !CONFIG_SWAP
case.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-09 3:15 ` Matthew Wilcox
` (2 preceding siblings ...)
2025-04-09 22:07 ` Fan Ni
@ 2025-04-18 22:42 ` Fan Ni
2025-04-23 18:50 ` Matthew Wilcox
3 siblings, 1 reply; 9+ messages in thread
From: Fan Ni @ 2025-04-18 22:42 UTC (permalink / raw)
To: Matthew Wilcox
Cc: nifan.cxl, muchun.song, mcgrof, a.manzanares, dave, akpm, david,
linux-mm, linux-kernel
On Wed, Apr 09, 2025 at 04:15:30AM +0100, Matthew Wilcox wrote:
> On Tue, Apr 08, 2025 at 05:49:10PM -0700, nifan.cxl@gmail.com wrote:
> > From: Fan Ni <fan.ni@samsung.com>
> >
> > Convert the use of &folio->page to folio_page(folio, 0) where struct
> > filio fits in. This is part of the efforts to move some fields out of
> > struct page to reduce its size.
>
> Thanks for sending the patch. You've mixed together quite a few things;
> I'd suggest focusing on one API at a time.
>
...
> > out:
> > - trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced,
> > - none_or_zero, result, unmapped);
> > + trace_mm_khugepaged_scan_pmd(mm, folio_page(folio, 0), writable,
> > + referenced, none_or_zero, result,
> > + unmapped);
> > return result;
>
> ditto,
>
> > result = install_pmd
> > - ? set_huge_pmd(vma, haddr, pmd, &folio->page)
> > + ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
> > : SCAN_SUCCEED;
>
> I feel that set_huge_pmd() should take a folio.
There is a patch on the mailing list for it,
https://lore.kernel.org/linux-mm/20240817095122.2460977-5-wangkefeng.wang@huawei.com/
If the above patch is needed, do_set_pmd() should be converted to use folio.
Fan
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-18 22:42 ` Fan Ni
@ 2025-04-23 18:50 ` Matthew Wilcox
2025-04-23 19:16 ` David Hildenbrand
0 siblings, 1 reply; 9+ messages in thread
From: Matthew Wilcox @ 2025-04-23 18:50 UTC (permalink / raw)
To: Fan Ni
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, david, linux-mm,
linux-kernel
On Fri, Apr 18, 2025 at 03:42:45PM -0700, Fan Ni wrote:
> > > result = install_pmd
> > > - ? set_huge_pmd(vma, haddr, pmd, &folio->page)
> > > + ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
> > > : SCAN_SUCCEED;
> >
> > I feel that set_huge_pmd() should take a folio.
> There is a patch on the mailing list for it,
> https://lore.kernel.org/linux-mm/20240817095122.2460977-5-wangkefeng.wang@huawei.com/
>
> If the above patch is needed, do_set_pmd() should be converted to use folio.
Maybe? I think we'll eventually want to support folios larger than PMD
size. So I don't want to pass in a folio here unless we can calculate the
precise PMD-size chunk we want to map from this folio given the
information available in vmf. I know that today the implementation
does this:
if (folio_order(folio) != HPAGE_PMD_ORDER)
return ret;
page = &folio->page;
but eventually we should do better than that. And I don't want to
lose the information about which page in the folio we really want
to map. So have a think about what the right interface should be
(passing in a page? folio + offset-in-number-of-pages? Something
involving the PFN?)
The obvious hurdle is folio_add_file_rmap_pmd() which today takes both a
folio and a page.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0)
2025-04-23 18:50 ` Matthew Wilcox
@ 2025-04-23 19:16 ` David Hildenbrand
0 siblings, 0 replies; 9+ messages in thread
From: David Hildenbrand @ 2025-04-23 19:16 UTC (permalink / raw)
To: Matthew Wilcox, Fan Ni
Cc: muchun.song, mcgrof, a.manzanares, dave, akpm, linux-mm, linux-kernel
On 23.04.25 20:50, Matthew Wilcox wrote:
> On Fri, Apr 18, 2025 at 03:42:45PM -0700, Fan Ni wrote:
>>>> result = install_pmd
>>>> - ? set_huge_pmd(vma, haddr, pmd, &folio->page)
>>>> + ? set_huge_pmd(vma, haddr, pmd, folio_page(folio, 0))
>>>> : SCAN_SUCCEED;
>>>
>>> I feel that set_huge_pmd() should take a folio.
>> There is a patch on the mailing list for it,
>> https://lore.kernel.org/linux-mm/20240817095122.2460977-5-wangkefeng.wang@huawei.com/
>>
>> If the above patch is needed, do_set_pmd() should be converted to use folio.
>
> Maybe? I think we'll eventually want to support folios larger than PMD
> size. So I don't want to pass in a folio here unless we can calculate the
> precise PMD-size chunk we want to map from this folio given the
> information available in vmf. I know that today the implementation
> does this:
>
> if (folio_order(folio) != HPAGE_PMD_ORDER)
> return ret;
> page = &folio->page;
>
> but eventually we should do better than that. And I don't want to
> lose the information about which page in the folio we really want
> to map. So have a think about what the right interface should be
> (passing in a page? folio + offset-in-number-of-pages? Something
> involving the PFN?)
>
> The obvious hurdle is folio_add_file_rmap_pmd() which today takes both a
> folio and a page.
The interface is prepared for that use case: I decided to pass in folio
+ page to allow for folios that span multiple PMDs. (obviously, the
internal implementation does not allow for that)
We could later change it to folio + offset, but should do so for all
rmap functions consistently.
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-04-23 19:16 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-09 0:49 [PATCH] mm/hugetlb: Convert &folio->page to folio_page(folio, 0) nifan.cxl
2025-04-09 3:15 ` Matthew Wilcox
2025-04-09 9:03 ` David Hildenbrand
2025-04-09 17:27 ` Zi Yan
2025-04-09 22:07 ` Fan Ni
2025-04-10 3:48 ` Matthew Wilcox
2025-04-18 22:42 ` Fan Ni
2025-04-23 18:50 ` Matthew Wilcox
2025-04-23 19:16 ` David Hildenbrand
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox