From: Qi Zheng <zhengqi.arch@bytedance.com>
To: david@redhat.com, hughd@google.com, willy@infradead.org,
mgorman@suse.de, muchun.song@linux.dev, vbabka@kernel.org,
akpm@linux-foundation.org, zokeefe@google.com,
rientjes@google.com
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Qi Zheng <zhengqi.arch@bytedance.com>
Subject: [RFC PATCH v2 3/7] mm: pass address information to pmd_install()
Date: Mon, 5 Aug 2024 20:55:07 +0800 [thread overview]
Message-ID: <095dc55b68ef4650e2eaf66ad7dd2feabe87f89e.1722861064.git.zhengqi.arch@bytedance.com> (raw)
In-Reply-To: <cover.1722861064.git.zhengqi.arch@bytedance.com>
In the subsequent implementation of freeing empty page table pages,
we need the address information to flush tlb, so pass address to
pmd_install() in advance.
No functional changes.
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
include/linux/hugetlb.h | 2 +-
include/linux/mm.h | 9 +++++----
mm/debug_vm_pgtable.c | 2 +-
mm/filemap.c | 2 +-
mm/gup.c | 2 +-
mm/internal.h | 3 ++-
mm/memory.c | 15 ++++++++-------
mm/migrate_device.c | 2 +-
mm/mprotect.c | 8 ++++----
mm/mremap.c | 2 +-
mm/userfaultfd.c | 6 +++---
11 files changed, 28 insertions(+), 25 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a76db143bffee..fcdcef367fffe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -189,7 +189,7 @@ static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address)
static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd,
unsigned long address)
{
- return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address);
+ return pte_alloc(mm, pmd, address) ? NULL : pte_offset_huge(pmd, address);
}
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b1ef2afe620c5..f0b821dcb085b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2758,7 +2758,7 @@ static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
#endif
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
int __pte_alloc_kernel(pmd_t *pmd);
#if defined(CONFIG_MMU)
@@ -2945,13 +2945,14 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdvalp,
pte_unmap(pte); \
} while (0)
-#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
+#define pte_alloc(mm, pmd, addr) \
+ (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, addr))
#define pte_alloc_map(mm, pmd, address) \
- (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
+ (pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- (pte_alloc(mm, pmd) ? \
+ (pte_alloc(mm, pmd, address) ? \
NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index e4969fb54da34..18375744e1845 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -1246,7 +1246,7 @@ static int __init init_args(struct pgtable_debug_args *args)
args->start_pmdp = pmd_offset(args->pudp, 0UL);
WARN_ON(!args->start_pmdp);
- if (pte_alloc(args->mm, args->pmdp)) {
+ if (pte_alloc(args->mm, args->pmdp, args->vaddr)) {
pr_err("Failed to allocate pte entries\n");
ret = -ENOMEM;
goto error;
diff --git a/mm/filemap.c b/mm/filemap.c
index 3285dffb64cf8..efcb8ae3f235f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3453,7 +3453,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio,
}
if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
- pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
+ pmd_install(mm, vmf->pmd, vmf->address, &vmf->prealloc_pte);
return false;
}
diff --git a/mm/gup.c b/mm/gup.c
index d19884e097fd2..53c3b73810150 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -972,7 +972,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
split_huge_pmd(vma, pmd, address);
/* If pmd was left empty, stuff a page table in there quickly */
- return pte_alloc(mm, pmd) ? ERR_PTR(-ENOMEM) :
+ return pte_alloc(mm, pmd, address) ? ERR_PTR(-ENOMEM) :
follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
page = follow_huge_pmd(vma, address, pmd, flags, ctx);
diff --git a/mm/internal.h b/mm/internal.h
index 52f7fc4e8ac30..dfc992de01115 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -325,7 +325,8 @@ void folio_activate(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked);
-void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
+void pmd_install(struct mm_struct *mm, pmd_t *pmd, unsigned long addr,
+ pgtable_t *pte);
struct zap_details;
void unmap_page_range(struct mmu_gather *tlb,
diff --git a/mm/memory.c b/mm/memory.c
index afd8a967fb953..fef1e425e4702 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -417,7 +417,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
} while (vma);
}
-void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
+void pmd_install(struct mm_struct *mm, pmd_t *pmd, unsigned long addr,
+ pgtable_t *pte)
{
spinlock_t *ptl = pmd_lock(mm, pmd);
@@ -443,13 +444,13 @@ void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
spin_unlock(ptl);
}
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
{
pgtable_t new = pte_alloc_one(mm);
if (!new)
return -ENOMEM;
- pmd_install(mm, pmd, &new);
+ pmd_install(mm, pmd, addr, &new);
if (new)
pte_free(mm, new);
return 0;
@@ -2115,7 +2116,7 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
/* Allocate the PTE if necessary; takes PMD lock once only. */
ret = -ENOMEM;
- if (pte_alloc(mm, pmd))
+ if (pte_alloc(mm, pmd, addr))
goto out;
while (pages_to_write_in_pmd) {
@@ -4686,7 +4687,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
* Use pte_alloc() instead of pte_alloc_map(), so that OOM can
* be distinguished from a transient failure of pte_offset_map().
*/
- if (pte_alloc(vma->vm_mm, vmf->pmd))
+ if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
return VM_FAULT_OOM;
/* Use the zero-page for reads */
@@ -5033,8 +5034,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
}
if (vmf->prealloc_pte)
- pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
- else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
+ pmd_install(vma->vm_mm, vmf->pmd, vmf->address, &vmf->prealloc_pte);
+ else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address)))
return VM_FAULT_OOM;
}
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6d66dc1c6ffa0..e4d2e19e6611d 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -598,7 +598,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
goto abort;
if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
goto abort;
- if (pte_alloc(mm, pmdp))
+ if (pte_alloc(mm, pmdp, addr))
goto abort;
if (unlikely(anon_vma_prepare(vma)))
goto abort;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 37cf8d249405d..7b58db622f825 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -329,11 +329,11 @@ pgtable_populate_needed(struct vm_area_struct *vma, unsigned long cp_flags)
* allocation failures during page faults by kicking OOM and returning
* error.
*/
-#define change_pmd_prepare(vma, pmd, cp_flags) \
+#define change_pmd_prepare(vma, pmd, addr, cp_flags) \
({ \
long err = 0; \
if (unlikely(pgtable_populate_needed(vma, cp_flags))) { \
- if (pte_alloc(vma->vm_mm, pmd)) \
+ if (pte_alloc(vma->vm_mm, pmd, addr)) \
err = -ENOMEM; \
} \
err; \
@@ -374,7 +374,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
again:
next = pmd_addr_end(addr, end);
- ret = change_pmd_prepare(vma, pmd, cp_flags);
+ ret = change_pmd_prepare(vma, pmd, addr, cp_flags);
if (ret) {
pages = ret;
break;
@@ -401,7 +401,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
* cleared; make sure pmd populated if
* necessary, then fall-through to pte level.
*/
- ret = change_pmd_prepare(vma, pmd, cp_flags);
+ ret = change_pmd_prepare(vma, pmd, addr, cp_flags);
if (ret) {
pages = ret;
break;
diff --git a/mm/mremap.c b/mm/mremap.c
index f672d0218a6fe..7723d11e77cd2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -628,7 +628,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
}
if (pmd_none(*old_pmd))
continue;
- if (pte_alloc(new_vma->vm_mm, new_pmd))
+ if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
break;
if (move_ptes(vma, old_pmd, old_addr, old_addr + extent,
new_vma, new_pmd, new_addr, need_rmap_locks) < 0)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index aa3c9cc51cc36..41d659bd2589c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -796,7 +796,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
break;
}
if (unlikely(pmd_none(dst_pmdval)) &&
- unlikely(__pte_alloc(dst_mm, dst_pmd))) {
+ unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
err = -ENOMEM;
break;
}
@@ -1713,13 +1713,13 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
err = -ENOENT;
break;
}
- if (unlikely(__pte_alloc(mm, src_pmd))) {
+ if (unlikely(__pte_alloc(mm, src_pmd, src_addr))) {
err = -ENOMEM;
break;
}
}
- if (unlikely(pte_alloc(mm, dst_pmd))) {
+ if (unlikely(pte_alloc(mm, dst_pmd, dst_addr))) {
err = -ENOMEM;
break;
}
--
2.20.1
next prev parent reply other threads:[~2024-08-05 12:56 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-05 12:55 [RFC PATCH v2 0/7] synchronously scan and reclaim empty user PTE pages Qi Zheng
2024-08-05 12:55 ` [RFC PATCH v2 1/7] mm: pgtable: make pte_offset_map_nolock() return pmdval Qi Zheng
2024-08-05 14:43 ` David Hildenbrand
2024-08-06 2:40 ` Qi Zheng
2024-08-06 14:16 ` David Hildenbrand
[not found] ` <f6c05526-5ac9-4597-9e80-099ea22fa0ae@bytedance.com>
2024-08-09 16:54 ` David Hildenbrand
2024-08-12 6:21 ` Qi Zheng
2024-08-16 8:59 ` David Hildenbrand
2024-08-16 9:21 ` Qi Zheng
2024-08-05 12:55 ` [RFC PATCH v2 2/7] mm: introduce CONFIG_PT_RECLAIM Qi Zheng
2024-08-06 14:25 ` David Hildenbrand
2024-08-05 12:55 ` Qi Zheng [this message]
2024-08-05 12:55 ` [RFC PATCH v2 4/7] mm: pgtable: try to reclaim empty PTE pages in zap_page_range_single() Qi Zheng
2024-08-06 14:40 ` David Hildenbrand
[not found] ` <42942b4d-153e-43e2-bfb1-43db49f87e50@bytedance.com>
2024-08-16 9:22 ` David Hildenbrand
2024-08-16 10:01 ` Qi Zheng
2024-08-16 10:03 ` David Hildenbrand
2024-08-16 10:07 ` Qi Zheng
2024-08-05 12:55 ` [RFC PATCH v2 5/7] x86: mm: free page table pages by RCU instead of semi RCU Qi Zheng
2024-08-05 12:55 ` [RFC PATCH v2 6/7] x86: mm: define arch_flush_tlb_before_set_huge_page Qi Zheng
2024-08-05 12:55 ` [RFC PATCH v2 7/7] x86: select ARCH_SUPPORTS_PT_RECLAIM if X86_64 Qi Zheng
2024-08-05 13:14 ` [RFC PATCH v2 0/7] synchronously scan and reclaim empty user PTE pages Qi Zheng
2024-08-06 3:31 ` Qi Zheng
2024-08-16 2:55 ` Qi Zheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=095dc55b68ef4650e2eaf66ad7dd2feabe87f89e.1722861064.git.zhengqi.arch@bytedance.com \
--to=zhengqi.arch@bytedance.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=muchun.song@linux.dev \
--cc=rientjes@google.com \
--cc=vbabka@kernel.org \
--cc=willy@infradead.org \
--cc=zokeefe@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox