On 06/08/25 3:20 pm, Lorenzo Stoakes wrote:
On Wed, Aug 06, 2025 at 03:07:49PM +0530, Dev Jain wrote:
You mean in _this_ PTE of the batch right? As we're invoking these
on each part
of the PTE table.

I mean I guess we can simply do:

    struct page *first_page = pte_page(ptent);

Right?
Yes, but we should forward the result from vm_normal_page(), which does
exactly that for you, and increment the page accordingly as required,
just like with the pte we are processing.
Makes sense, so I guess I will have to change the signature of
prot_numa_skip()

to pass a double ptr to a page instead of folio and derive the folio in the
caller,

and pass down both the folio and the page to
set_write_prot_commit_flush_ptes.
I already don't love how we psas the folio back from there for very dubious
benefit. I really hate the idea of having a struct **page parameter...

I wonder if we should just have a quick fixup for hotfix, and refine this more
later?

I foresee some debate otherwise...

Yup I would personally prefer that. Although if you would like to see the churn, here
it is (based on mm-hotfixes-unstable, untested):

---
mm/mprotect.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 78bded7acf79..0735870e89ab 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -120,9 +120,10 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
pte_t oldpte, pte_t *pte, int target_node,
- struct folio **foliop)
+ struct page **pagep)
{
struct folio *folio = NULL;
+ struct page *page = NULL;
bool ret = true;
bool toptier;
int nid;
@@ -131,7 +132,9 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
if (pte_protnone(oldpte))
goto skip;
- folio = vm_normal_folio(vma, addr, oldpte);
+ page = vm_normal_page(vma, addr, oldpte);
+ if (page)
+ folio = page_folio(page);
if (!folio)
goto skip;
@@ -173,7 +176,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
skip:
- *foliop = folio;
+ *pagep = page;
return ret;
}
@@ -231,18 +234,17 @@ static int page_an on_exclu sive_sub_batch(int start_idx, int max_len,
* retrieve sub-batches.
*/
static void commit_anon_folio_batch(struct vm_area_struct *vma,
- struct folio *folio, unsigned long addr, pte_t *ptep,
+ struct folio *folio,struct page *page, unsigned long addr, pte_t *ptep,
pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
{
- struct page *first_page = folio_page(folio, 0);
bool expected_anon_exclusive;
int sub_batch_idx = 0;
int len;
while (nr_ptes) {
- expected_anon_exclusive = PageAnonExclusive(first_page + sub_batch_idx);
+ expected_anon_exclusive = PageAnonExclusive(nth_page(page, sub_batch_idx));
len = page_anon_exclusive_sub_batch(sub_batch_idx, nr_ptes,
- first_page, expected_anon_exclusive);
+ page, expected_anon_exclusive);
prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, len,
sub_batch_idx, expected_anon_exclusive, tlb);
sub_batch_idx += len;
@@ -251,7 +253,7 @@ static void commit_anon_folio_batch(struct vm_area_struct *vma,
}
static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
- struct folio *folio, unsigned long addr, pte_t *ptep,
+ struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep,
pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
{
bool set_wri te;
@@ -270,7 +272,7 @@ static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
/* idx = */ 0, set_write, tlb);
return;
}
- commit_anon_folio_batch(vma, folio, addr, ptep, oldpte, ptent, nr_ptes, tlb);
+ commit_anon_folio_batch(vma, folio, page, addr, ptep, oldpte, ptent, nr_ptes, tlb);
}
static long change_pte_range(struct mmu_gather *tlb,
@@ -305,6 +307,7 @@ static long change_pte_range(struct mmu_gather *tlb,< /div> const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE;
int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
struct folio *folio = NULL;
+ struct page *page = NULL;
pte_t ptent;
/*
@@ -313,7 +316,9 @@ static long change_pte_range(struct mmu_gather *tlb,
*/
if (prot_numa) {
int ret = prot_numa_skip(vma, addr, oldpte, pte,
- target_node, &folio);
+ target_node, &page);
+ if (page)
+ folio = page_folio(page);
if (ret) {
/* determine batch to skip */
@@ -351,7 +356,7 @@ static long change_pte_range(struct mmu_gather *tlb,
*/
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
!pte_write(ptent))
- set_write_prot_commit_flush_ptes(vma, folio,
+ set_write_prot_commit_flush_ptes(vma, folio, page,
addr, pte, oldpte, ptent, nr_ptes, tlb);
else
prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
--
2.30.2
I think the really correct way here is to pass double ptr to both folio and page,
since the way I do it now, I do a page_folio() twice potentially, which kind of
defeats the purpose of the trouble we took to save on a vm_normal_folio() :)
So I would prefer the pte_page(ptent) thingy for the fix patch.

    


...


              
+            else
+                prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
+                    nr_ptes, /* idx = */ 0, /* set_write =
*/ false, tlb);
Semi-broken intendation.
Because of else then 2 lines after?
prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
               nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);

Is what I would have expected.


I think a smart man once said, that if you need more than one line per
statement in
an if/else clause, a set of {} can aid readability. But I don't
particularly care :)

Can do this in follow up too.