linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Song Liu <songliubraving@fb.com>
To: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Cc: <namit@vmware.com>, <peterz@infradead.org>, <oleg@redhat.com>,
	<rostedt@goodmis.org>, <mhiramat@kernel.org>,
	<matthew.wilcox@oracle.com>, <kirill.shutemov@linux.intel.com>,
	<kernel-team@fb.com>, <william.kucharski@oracle.com>,
	<chad.mynhier@oracle.com>, <mike.kravetz@oracle.com>,
	Song Liu <songliubraving@fb.com>
Subject: [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
Date: Wed, 29 May 2019 14:20:48 -0700	[thread overview]
Message-ID: <20190529212049.2413886-4-songliubraving@fb.com> (raw)
In-Reply-To: <20190529212049.2413886-1-songliubraving@fb.com>

Instead of splitting the compound page with FOLL_SPLIT, this patch allows
uprobe to only split pmd for huge pages.

A helper function mm_address_trans_huge(mm, address) was introduced to
test whether the address in mm is pointing to THP.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/huge_mm.h |  8 ++++++++
 kernel/events/uprobes.c | 38 ++++++++++++++++++++++++++++++++------
 mm/huge_memory.c        | 24 ++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2d8a40fd06e4..4832d6580969 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -163,6 +163,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page, pgtable_t prealloc_pgtable);
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address);
+
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
 
@@ -302,6 +304,12 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct page *page,
 		pgtable_t prealloc_pgtable) {}
 
+static inline bool mm_address_trans_huge(struct mm_struct *mm,
+					 unsigned long address)
+{
+	return false;
+}
+
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ba49da99d2a2..56eeccc2f7a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
+#include <asm/pgalloc.h>
 
 #include <linux/uprobes.h>
 
@@ -153,7 +154,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct page_vma_mapped_walk pvmw = {
-		.page = old_page,
+		.page = compound_head(old_page),
 		.vma = vma,
 		.address = addr,
 	};
@@ -165,8 +166,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
 				addr + PAGE_SIZE);
 
-	VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
 	if (!orig) {
 		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
 					    &memcg, false);
@@ -188,7 +187,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	get_page(new_page);
 	if (orig) {
-		page_add_file_rmap(new_page, false);
+		page_add_file_rmap(compound_head(new_page),
+				   PageTransHuge(compound_head(new_page)));
 		inc_mm_counter(mm, mm_counter_file(new_page));
 		dec_mm_counter(mm, MM_ANONPAGES);
 	} else {
@@ -207,7 +207,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	set_pte_at_notify(mm, addr, pvmw.pte,
 			mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(old_page, false);
+	page_remove_rmap(compound_head(old_page),
+			 PageTransHuge(compound_head(old_page)));
 	if (!page_mapped(old_page))
 		try_to_free_swap(old_page);
 	page_vma_mapped_walk_done(&pvmw);
@@ -475,17 +476,42 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	int ret, is_register, ref_ctr_updated = 0;
 	pgoff_t index;
+	pgtable_t prealloc_pgtable = NULL;
+	unsigned long foll_flags = FOLL_FORCE;
 
 	is_register = is_swbp_insn(&opcode);
 	uprobe = container_of(auprobe, struct uprobe, arch);
 
+	/* do not FOLL_SPLIT yet */
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+			foll_flags, &old_page, &vma, NULL);
+
+	if (ret <= 0)
+		return ret;
+
+	if (mm_address_trans_huge(mm, vaddr)) {
+		prealloc_pgtable = pte_alloc_one(mm);
+		if (likely(prealloc_pgtable)) {
+			split_huge_pmd_address(vma, vaddr, false, NULL,
+					       prealloc_pgtable);
+			goto verify;
+		} else {
+			/* fallback to FOLL_SPLIT */
+			foll_flags |= FOLL_SPLIT;
+			put_page(old_page);
+		}
+	} else {
+		goto verify;
+	}
+
 retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-			FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+			foll_flags, &old_page, &vma, NULL);
 	if (ret <= 0)
 		return ret;
 
+verify:
 	ret = verify_opcode(old_page, vaddr, &opcode);
 	if (ret <= 0)
 		goto put_old;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dcb0e30213af..4714871353c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2360,6 +2360,30 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable);
 }
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		return false;
+
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return false;
+
+	pud = pud_offset(p4d, address);
+	if (!pud_present(*pud))
+		return false;
+
+	pmd = pmd_offset(pud, address);
+
+	return pmd_trans_huge(*pmd);
+}
+
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
 			     unsigned long start,
 			     unsigned long end,
-- 
2.17.1


  parent reply	other threads:[~2019-05-29 21:36 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
2019-05-30 11:10   ` Kirill A. Shutemov
2019-05-30 11:14     ` Kirill A. Shutemov
2019-05-30 17:23       ` Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed Song Liu
2019-05-30 11:17   ` Kirill A. Shutemov
2019-05-30 17:18     ` Song Liu
2019-05-29 21:20 ` Song Liu [this message]
2019-05-30 11:08   ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd William Kucharski
2019-05-30 17:24     ` Song Liu
2019-05-30 12:14   ` Kirill A. Shutemov
2019-05-30 17:37     ` Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes Song Liu
2019-05-30 12:20   ` Kirill A. Shutemov
2019-05-30 17:26     ` Song Liu
2019-05-31  7:00       ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190529212049.2413886-4-songliubraving@fb.com \
    --to=songliubraving@fb.com \
    --cc=chad.mynhier@oracle.com \
    --cc=kernel-team@fb.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matthew.wilcox@oracle.com \
    --cc=mhiramat@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=namit@vmware.com \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=william.kucharski@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox