linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Qi Zheng <zhengqi.arch@bytedance.com>,
	David Hildenbrand <david@redhat.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	John Hubbard <jhubbard@nvidia.com>, Nadav Amit <namit@vmware.com>,
	Barry Song <baohua@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Yu Zhao <yuzhao@google.com>, Steven Barrett <steven@liquorix.net>,
	Juergen Gross <jgross@suse.com>, Peter Xu <peterx@redhat.com>,
	Kefeng Wang <wangkefeng.wang@huawei.com>,
	Tong Tiangen <tongtiangen@huawei.com>,
	Christoph Hellwig <hch@infradead.org>,
	"Liam R. Howlett" <Liam.Howlett@Oracle.com>,
	Yang Shi <shy828301@gmail.com>, Vlastimil Babka <vbabka@suse.cz>,
	Alex Sierra <alex.sierra@amd.com>,
	Vincent Whitchurch <vincent.whitchurch@axis.com>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	Li kunyu <kunyu@nfschina.com>, Liu Shixin <liushixin2@huawei.com>,
	Hugh Dickins <hughd@google.com>, Minchan Kim <minchan@kernel.org>,
	Joey Gouly <joey.gouly@arm.com>,
	Chih-En Lin <shiyn.lin@gmail.com>, Michal Hocko <mhocko@suse.com>,
	Suren Baghdasaryan <surenb@google.com>,
	"Zach O'Keefe" <zokeefe@google.com>,
	Gautam Menghani <gautammenghani201@gmail.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Brown <broonie@kernel.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Andrei Vagin <avagin@gmail.com>,
	Shakeel Butt <shakeelb@google.com>,
	Daniel Bristot de Oliveira <bristot@kernel.org>,
	"Jason A. Donenfeld" <Jason@zx2c4.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Alexey Gladkov <legion@kernel.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	linux-trace-kernel@vger.kernel.org,
	linux-perf-users@vger.kernel.org,
	Dinglan Peng <peng301@purdue.edu>,
	Pedro Fonseca <pfonseca@purdue.edu>,
	Jim Huang <jserv@ccns.ncku.edu.tw>,
	Huichun Feng <foxhoundsk.tw@gmail.com>
Subject: [PATCH v5 17/17] mm: Check the unexpected modification of COW-ed PTE
Date: Fri, 14 Apr 2023 22:23:41 +0800	[thread overview]
Message-ID: <20230414142341.354556-18-shiyn.lin@gmail.com> (raw)
In-Reply-To: <20230414142341.354556-1-shiyn.lin@gmail.com>

In the most of the cases, we don't expect any write access to COW-ed PTE
table. To prevent this, add the new modification check to the page table
check.

But, there are still some of valid reasons where we might want to modify
COW-ed PTE tables. Therefore, add the enable/disable function to the
check.

Signed-off-by: Chih-En Lin <shiyn.lin@gmail.com>
---
 arch/x86/include/asm/pgtable.h   |  1 +
 include/linux/page_table_check.h | 62 ++++++++++++++++++++++++++++++++
 mm/memory.c                      |  4 +++
 mm/page_table_check.c            | 58 ++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7425f32e5293..6b323c672e36 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1022,6 +1022,7 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
+	cowed_pte_table_check_modify(mm, addr, ptep, pte);
 	page_table_check_pte_set(mm, addr, ptep, pte);
 	set_pte(ptep, pte);
 }
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 01e16c7696ec..4a54dc454281 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -113,6 +113,54 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
 	__page_table_check_pte_clear_range(mm, addr, pmd);
 }
 
+#ifdef CONFIG_COW_PTE
+void __check_cowed_pte_table_enable(pte_t *ptep);
+void __check_cowed_pte_table_disable(pte_t *ptep);
+void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, pte_t pte);
+
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__check_cowed_pte_table_enable(ptep);
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__check_cowed_pte_table_disable(ptep);
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__cowed_pte_table_check_modify(mm, addr, ptep, pte);
+}
+#else
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+}
+#endif /* CONFIG_COW_PTE */
+
+
 #else
 
 static inline void page_table_check_alloc(struct page *page, unsigned int order)
@@ -162,5 +210,19 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
 {
 }
 
+static inline void check_cowed_pte_table_enable(pte_t *ptep)
+{
+}
+
+static inline void check_cowed_pte_table_disable(pte_t *ptep)
+{
+}
+
+static inline void cowed_pte_table_check_modify(struct mm_struct *mm,
+						unsigned long addr,
+						pte_t *ptep, pte_t pte)
+{
+}
+
 #endif /* CONFIG_PAGE_TABLE_CHECK */
 #endif /* __LINUX_PAGE_TABLE_CHECK_H */
diff --git a/mm/memory.c b/mm/memory.c
index 7908e20f802a..e62487413038 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1202,10 +1202,12 @@ copy_cow_pte_range(struct vm_area_struct *dst_vma,
 				 * Although, parent's PTE is COW-ed, we should
 				 * still need to handle all the swap stuffs.
 				 */
+				check_cowed_pte_table_disable(src_pte);
 				ret = copy_nonpresent_pte(dst_mm, src_mm,
 							  src_pte, src_pte,
 							  curr, curr,
 							  addr, rss);
+				check_cowed_pte_table_enable(src_pte);
 				if (ret == -EIO) {
 					entry = pte_to_swp_entry(*src_pte);
 					break;
@@ -1223,8 +1225,10 @@ copy_cow_pte_range(struct vm_area_struct *dst_vma,
 			 * copy_present_pte() will determine the mapped page
 			 * should be COW mapping or not.
 			 */
+			check_cowed_pte_table_disable(src_pte);
 			ret = copy_present_pte(curr, curr, src_pte, src_pte,
 					       addr, rss, NULL);
+			check_cowed_pte_table_enable(src_pte);
 			/*
 			 * If we need a pre-allocated page for this pte,
 			 * drop the lock, recover all the entries, fall
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 25d8610c0042..5175c7476508 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -14,6 +14,9 @@
 struct page_table_check {
 	atomic_t anon_map_count;
 	atomic_t file_map_count;
+#ifdef CONFIG_COW_PTE
+	atomic_t check_cowed_pte;
+#endif
 };
 
 static bool __page_table_check_enabled __initdata =
@@ -248,3 +251,58 @@ void __page_table_check_pte_clear_range(struct mm_struct *mm,
 		pte_unmap(ptep - PTRS_PER_PTE);
 	}
 }
+
+#ifdef CONFIG_COW_PTE
+void __check_cowed_pte_table_enable(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+
+	atomic_set(&ptc->check_cowed_pte, 1);
+	page_ext_put(page_ext);
+}
+
+void __check_cowed_pte_table_disable(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+
+	atomic_set(&ptc->check_cowed_pte, 0);
+	page_ext_put(page_ext);
+}
+
+static int check_cowed_pte_table(pte_t *ptep)
+{
+	struct page *page = pte_page(*ptep);
+	struct page_ext *page_ext = page_ext_get(page);
+	struct page_table_check *ptc = get_page_table_check(page_ext);
+	int check = 0;
+
+	check = atomic_read(&ptc->check_cowed_pte);
+	page_ext_put(page_ext);
+
+	return check;
+}
+
+void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, pte_t pte)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (!test_bit(MMF_COW_PTE, &mm->flags) || !check_cowed_pte_table(ptep))
+		return;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
+	pmd = pmd_offset(pud, addr);
+
+	if (!pmd_none(*pmd) && !pmd_write(*pmd) && cow_pte_count(pmd) > 1)
+		BUG_ON(!pte_same(*ptep, pte));
+}
+#endif
-- 
2.34.1



      parent reply	other threads:[~2023-04-14 14:27 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-14 14:23 [PATCH v5 00/17] Introduce Copy-On-Write to Page Table Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 01/17] mm: Split out the present cases from zap_pte_range() Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 02/17] mm: Allow user to control COW PTE via prctl Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 03/17] mm: Add Copy-On-Write PTE to fork() Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 04/17] mm: Add break COW PTE fault and helper functions Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 05/17] mm: Handle COW-ed PTE during zapping Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 06/17] mm/rmap: Break COW PTE in rmap walking Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 07/17] mm/khugepaged: Break COW PTE before scanning pte Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 08/17] mm/ksm: Break COW PTE before modify shared PTE Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 09/17] mm/madvise: Handle COW-ed PTE with madvise() Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 10/17] mm/gup: Trigger break COW PTE before calling follow_pfn_pte() Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 11/17] mm/mprotect: Break COW PTE before changing protection Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 12/17] mm/userfaultfd: Support COW PTE Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 13/17] mm/migrate_device: " Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 14/17] fs/proc: Support COW PTE with clear_refs_write Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 15/17] events/uprobes: Break COW PTE before replacing page Chih-En Lin
2023-04-14 14:23 ` [PATCH v5 16/17] mm: fork: Enable COW PTE to fork system call Chih-En Lin
2023-04-14 14:23 ` Chih-En Lin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230414142341.354556-18-shiyn.lin@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=Jason@zx2c4.com \
    --cc=Liam.Howlett@Oracle.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=anshuman.khandual@arm.com \
    --cc=avagin@gmail.com \
    --cc=baohua@kernel.org \
    --cc=bp@alien8.de \
    --cc=bristot@kernel.org \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=foxhoundsk.tw@gmail.com \
    --cc=gautammenghani201@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@infradead.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=irogers@google.com \
    --cc=jgross@suse.com \
    --cc=jhubbard@nvidia.com \
    --cc=joey.gouly@arm.com \
    --cc=jolsa@kernel.org \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=kunyu@nfschina.com \
    --cc=legion@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=liushixin2@huawei.com \
    --cc=mark.rutland@arm.com \
    --cc=mhiramat@kernel.org \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=namit@vmware.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=peng301@purdue.edu \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pfonseca@purdue.edu \
    --cc=rostedt@goodmis.org \
    --cc=shakeelb@google.com \
    --cc=shy828301@gmail.com \
    --cc=steven@liquorix.net \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=tongtiangen@huawei.com \
    --cc=vbabka@suse.cz \
    --cc=vincent.whitchurch@axis.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=yuzhao@google.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=zokeefe@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox