linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, andreyknvl@gmail.com, arnd@arndb.de,
	bp@alien8.de, brauner@kernel.org, bsegall@google.com,
	corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
	dietmar.eggemann@arm.com, ebiederm@xmission.com, hpa@zytor.com,
	jakub.wartak@mailbox.org, jannh@google.com,
	juri.lelli@redhat.com, khalid@kernel.org,
	liam.howlett@oracle.com, linyongting@bytedance.com,
	lorenzo.stoakes@oracle.com, luto@kernel.org,
	markhemm@googlemail.com, maz@kernel.org, mhiramat@kernel.org,
	mgorman@suse.de, mhocko@suse.com, mingo@redhat.com,
	muchun.song@linux.dev, neilb@suse.de, osalvador@suse.de,
	pcc@google.com, peterz@infradead.org, pfalcato@suse.de,
	rostedt@goodmis.org, rppt@kernel.org, shakeel.butt@linux.dev,
	surenb@google.com, tglx@linutronix.de, vasily.averin@linux.dev,
	vbabka@suse.cz, vincent.guittot@linaro.org,
	viro@zeniv.linux.org.uk, vschneid@redhat.com,
	willy@infradead.org, x86@kernel.org, xhao@linux.alibaba.com,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org
Subject: [PATCH v3 12/22] mm: introduce PUD page table shared count
Date: Tue, 19 Aug 2025 18:04:05 -0700	[thread overview]
Message-ID: <20250820010415.699353-13-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <20250820010415.699353-1-anthony.yznaga@oracle.com>

Once an mshare shared page table has been linked with one or more
process page tables it becomes necessary to ensure that the shared
page table is not completely freed when objects in it are unmapped
in order to avoid a potential UAF bug. To do this, introduce and
use a reference count for PUD pages.

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 include/linux/mm.h       |  1 +
 include/linux/mm_types.h | 36 ++++++++++++++++++++++++++++++++++--
 mm/memory.c              | 21 +++++++++++++++++++--
 3 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 96440082a633..c8dfa5c6e7d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3217,6 +3217,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
 
 static inline void pagetable_pud_ctor(struct ptdesc *ptdesc)
 {
+	ptdesc_pud_pts_init(ptdesc);
 	__pagetable_ctor(ptdesc);
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c8f4d2a2c60b..da5a7a31a81d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -537,7 +537,7 @@ FOLIO_MATCH(compound_head, _head_3);
  * @pt_index:         Used for s390 gmap.
  * @pt_mm:            Used for x86 pgds.
  * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
- * @pt_share_count:   Used for HugeTLB PMD page table share count.
+ * @pt_share_count:   Used for HugeTLB PMD or Mshare PUD page table share count.
  * @_pt_pad_2:        Padding to ensure proper alignment.
  * @ptl:              Lock for the page table.
  * @__page_type:      Same as page->page_type. Unused for page tables.
@@ -564,7 +564,7 @@ struct ptdesc {
 		pgoff_t pt_index;
 		struct mm_struct *pt_mm;
 		atomic_t pt_frag_refcount;
-#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) || defined(CONFIG_MSHARE)
 		atomic_t pt_share_count;
 #endif
 	};
@@ -638,6 +638,38 @@ static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
 }
 #endif
 
+#ifdef CONFIG_MSHARE
+static inline void ptdesc_pud_pts_init(struct ptdesc *ptdesc)
+{
+	atomic_set(&ptdesc->pt_share_count, 0);
+}
+
+static inline void ptdesc_pud_pts_inc(struct ptdesc *ptdesc)
+{
+	atomic_inc(&ptdesc->pt_share_count);
+}
+
+static inline void ptdesc_pud_pts_dec(struct ptdesc *ptdesc)
+{
+	atomic_dec(&ptdesc->pt_share_count);
+}
+
+static inline int ptdesc_pud_pts_count(struct ptdesc *ptdesc)
+{
+	return atomic_read(&ptdesc->pt_share_count);
+}
+#else
+static inline void ptdesc_pud_pts_init(struct ptdesc *ptdesc)
+{
+}
+
+static inline int ptdesc_pud_pts_count(struct ptdesc *ptdesc)
+{
+	return 0;
+}
+#endif
+
+
 /*
  * Used for sizing the vmemmap region on some architectures
  */
diff --git a/mm/memory.c b/mm/memory.c
index dbc299aa82c2..4e3bb49b95e2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -228,9 +228,18 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	mm_dec_nr_pmds(tlb->mm);
 }
 
+static inline bool pud_range_is_shared(pud_t *pud)
+{
+	if (ptdesc_pud_pts_count(virt_to_ptdesc(pud)))
+		return true;
+
+	return false;
+}
+
 static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
-				unsigned long floor, unsigned long ceiling)
+				unsigned long floor, unsigned long ceiling,
+				bool *pud_is_shared)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -257,6 +266,10 @@ static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 		return;
 
 	pud = pud_offset(p4d, start);
+	if (unlikely(pud_range_is_shared(pud))) {
+		*pud_is_shared = true;
+		return;
+	}
 	p4d_clear(p4d);
 	pud_free_tlb(tlb, pud, start);
 	mm_dec_nr_puds(tlb->mm);
@@ -269,6 +282,7 @@ static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
 	p4d_t *p4d;
 	unsigned long next;
 	unsigned long start;
+	bool pud_is_shared = false;
 
 	start = addr;
 	p4d = p4d_offset(pgd, addr);
@@ -276,7 +290,8 @@ static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
 		next = p4d_addr_end(addr, end);
 		if (p4d_none_or_clear_bad(p4d))
 			continue;
-		free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+		free_pud_range(tlb, p4d, addr, next, floor, ceiling,
+				&pud_is_shared);
 	} while (p4d++, addr = next, addr != end);
 
 	start &= PGDIR_MASK;
@@ -290,6 +305,8 @@ static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
 	if (end - 1 > ceiling - 1)
 		return;
 
+	if (unlikely(pud_is_shared))
+		return;
 	p4d = p4d_offset(pgd, start);
 	pgd_clear(pgd);
 	p4d_free_tlb(tlb, p4d, start);
-- 
2.47.1



  parent reply	other threads:[~2025-08-20  1:05 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-20  1:03 [PATCH v3 00/22] Add support for shared PTEs across processes Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 01/22] mm: Add msharefs filesystem Anthony Yznaga
2025-09-08 18:29   ` Liam R. Howlett
2025-09-08 19:09     ` Anthony Yznaga
2025-09-10 12:14   ` Pedro Falcato
2025-09-10 12:46     ` David Hildenbrand
2025-08-20  1:03 ` [PATCH v3 02/22] mm/mshare: pre-populate msharefs with information file Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 03/22] mm/mshare: make msharefs writable and support directories Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 04/22] mm/mshare: allocate an mm_struct for msharefs files Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 05/22] mm/mshare: add ways to set the size of an mshare region Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 06/22] mm/mshare: Add a vma flag to indicate " Anthony Yznaga
2025-09-08 18:45   ` David Hildenbrand
2025-09-08 18:56     ` Anthony Yznaga
2025-09-08 19:02       ` David Hildenbrand
2025-09-08 19:03         ` Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 07/22] mm/mshare: Add mmap support Anthony Yznaga
2025-08-20 19:02   ` kernel test robot
2025-08-20  1:04 ` [PATCH v3 08/22] mm/mshare: flush all TLBs when updating PTEs in an mshare range Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 09/22] sched/numa: do not scan msharefs vmas Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 10/22] mm: add mmap_read_lock_killable_nested() Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 11/22] mm: add and use unmap_page_range vm_ops hook Anthony Yznaga
2025-08-21 15:40   ` kernel test robot
2025-08-20  1:04 ` Anthony Yznaga [this message]
2025-08-20  1:04 ` [PATCH v3 13/22] mm/mshare: prepare for page table sharing support Anthony Yznaga
2025-09-15 15:27   ` Lorenzo Stoakes
2025-08-20  1:04 ` [PATCH v3 14/22] x86/mm: enable page table sharing Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 15/22] mm: create __do_mmap() to take an mm_struct * arg Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 16/22] mm: pass the mm in vma_munmap_struct Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 17/22] sched/mshare: mshare ownership Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 18/22] mm/mshare: Add an ioctl for mapping objects in an mshare region Anthony Yznaga
2025-08-20 20:36   ` kernel test robot
2025-08-20  1:04 ` [PATCH v3 19/22] mm/mshare: Add an ioctl for unmapping " Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 20/22] mm/mshare: support mapping files and anon hugetlb " Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 21/22] mm/mshare: provide a way to identify an mm as an mshare host mm Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 22/22] mm/mshare: charge fault handling allocations to the mshare owner Anthony Yznaga
2025-09-08 18:50   ` David Hildenbrand
2025-09-08 19:21     ` Anthony Yznaga
2025-09-08 20:28       ` David Hildenbrand
2025-09-08 20:55         ` Anthony Yznaga
2025-09-08 20:32 ` [PATCH v3 00/22] Add support for shared PTEs across processes David Hildenbrand
2025-09-08 20:59   ` Matthew Wilcox
2025-09-08 21:14     ` Anthony Yznaga
2025-09-09  7:53       ` David Hildenbrand
2025-09-09 18:29         ` Anthony Yznaga
2025-09-09 19:06         ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250820010415.699353-13-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=andreyknvl@gmail.com \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=bsegall@google.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=ebiederm@xmission.com \
    --cc=hpa@zytor.com \
    --cc=jakub.wartak@mailbox.org \
    --cc=jannh@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=khalid@kernel.org \
    --cc=liam.howlett@oracle.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linyongting@bytedance.com \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=markhemm@googlemail.com \
    --cc=maz@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mhiramat@kernel.org \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=muchun.song@linux.dev \
    --cc=neilb@suse.de \
    --cc=osalvador@suse.de \
    --cc=pcc@google.com \
    --cc=peterz@infradead.org \
    --cc=pfalcato@suse.de \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vasily.averin@linux.dev \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vschneid@redhat.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=xhao@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox