[PATCH] support tmpfs hugepage PMD is not split when COW

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Chen Haixiang <chenhaixiang3@huawei.com>
To: <linux-mm@kvack.org>, <akpm@linux-foundation.org>, <hughd@google.com>
Cc: <louhongxiang@huawei.com>, <wangbin224@huawei.com>,
	<liuyuntao10@huawei.com>, <chenhaixiang3@huawei.com>
Subject: [PATCH] support tmpfs hugepage PMD is not split when COW
Date: Wed, 10 Jan 2024 17:20:28 +0800	[thread overview]
Message-ID: <20240110092028.1777-1-chenhaixiang3@huawei.com> (raw)

Transparent hugepages in tmpfs can enhance TLB efficiency by reducing
TLB misses. However, during Copy-On-Write (COW) memory faults, these
hugepages may be split. In some scenarios, preventing this splitting
is desirable. We might introduce a shmem_huge_fault to inhibit this
behavior, along with a mount parameter to enable or disable this function.

Signed-off-by: Chen Haixiang <chenhaixiang3@huawei.com>
---
 include/linux/mm.h       |  1 +
 include/linux/shmem_fs.h |  1 +
 mm/memory.c              |  7 ++++
 mm/shmem.c               | 85 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index da5219b48d52..eb44574965d6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -573,6 +573,7 @@ struct vm_operations_struct {
 			unsigned long end, unsigned long newflags);
 	vm_fault_t (*fault)(struct vm_fault *vmf);
 	vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
+	vm_fault_t (*shmem_huge_fault)(struct vm_fault *vmf, pmd_t orig_pmd);
 	vm_fault_t (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 	unsigned long (*pagesize)(struct vm_area_struct * area);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 2caa6b86106a..4484f2f33afe 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -73,6 +73,7 @@ struct shmem_sb_info {
 	struct list_head shrinklist;  /* List of shinkable inodes */
 	unsigned long shrinklist_len; /* Length of shrinklist */
 	struct shmem_quota_limits qlimits; /* Default quota limits */
+	unsigned int no_split;  /* Do not split shmempmdmaped in tmpfs */
 };
 
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
diff --git a/mm/memory.c b/mm/memory.c
index 5c757fba8858..7d27a6b5e69f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4942,6 +4942,13 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
 		}
 	}
 
+	if (vmf->vma->vm_ops->shmem_huge_fault) {
+		vm_fault_t ret = vmf->vma->vm_ops->shmem_huge_fault(vmf, vmf->orig_pmd);
+
+		if (!(ret & VM_FAULT_FALLBACK))
+			return ret;
+	}
+
 split:
 	/* COW or write-notify handled on pte level: split pmd. */
 	__split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);
diff --git a/mm/shmem.c b/mm/shmem.c
index 0d1ce70bce38..8211211f7405 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -118,6 +118,7 @@ struct shmem_options {
 	umode_t mode;
 	bool full_inums;
 	int huge;
+	unsigned int no_split;
 	int seen;
 	bool noswap;
 	unsigned short quota_types;
@@ -128,6 +129,7 @@ struct shmem_options {
 #define SHMEM_SEEN_INUMS 8
 #define SHMEM_SEEN_NOSWAP 16
 #define SHMEM_SEEN_QUOTA 32
+#define SHMEM_SEEN_NO_SPLIT 64
 };
 
 #ifdef CONFIG_TMPFS
@@ -2238,6 +2240,79 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	return ret;
 }
 
+static vm_fault_t shmem_huge_fault(struct vm_fault *vmf, pmd_t orig_pmd)
+{
+	vm_fault_t ret = VM_FAULT_FALLBACK;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+	struct folio *old_folio, *new_folio;
+	pmd_t entry;
+	int gfp_flags = GFP_HIGHUSER_MOVABLE | __GFP_COMP;
+	struct vm_area_struct *vma = vmf->vma;
+	struct shmem_sb_info *sbinfo = NULL;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	sbinfo = SHMEM_SB(info->vfs_inode.i_sb);
+
+	if (sbinfo->no_split == 0)
+		return VM_FAULT_FALLBACK;
+
+	/* ShmemPmdMapped in tmpfs will not split huge pmd */
+	if (!(vmf->flags & FAULT_FLAG_WRITE)
+			|| (vma->vm_flags & VM_SHARED))
+		return VM_FAULT_FALLBACK;
+
+	new_folio = vma_alloc_folio(gfp_flags, HPAGE_PMD_ORDER,
+			vmf->vma, haddr, true);
+	if (!new_folio)
+		ret = VM_FAULT_FALLBACK;
+
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (pmd_none(*vmf->pmd)) {
+		ret = VM_FAULT_FALLBACK;
+		goto out;
+	}
+	if (!pmd_same(*vmf->pmd, orig_pmd)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!new_folio) {
+		count_vm_event(THP_FAULT_FALLBACK);
+		ret = VM_FAULT_FALLBACK;
+		goto out;
+	}
+	old_folio = page_folio(pmd_page(*vmf->pmd));
+	page_remove_rmap(&old_folio->page, vma, true);
+	pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
+
+	__folio_set_locked(new_folio);
+	__folio_set_swapbacked(new_folio);
+	__folio_mark_uptodate(new_folio);
+
+	flush_icache_pages(vma, &new_folio->page, HPAGE_PMD_NR);
+	entry = mk_huge_pmd(&new_folio->page, vma->vm_page_prot);
+	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+
+	page_add_file_rmap(&new_folio->page, vma, true);
+	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
+	count_vm_event(THP_FILE_MAPPED);
+
+	folio_unlock(new_folio);
+	spin_unlock(vmf->ptl);
+	copy_user_large_folio(new_folio, old_folio, haddr, vma);
+	folio_put(old_folio);
+	ret = 0;
+	return ret;
+
+out:
+	if (new_folio)
+		folio_put(new_folio);
+	spin_unlock(vmf->ptl);
+	return ret;
+}
+
 unsigned long shmem_get_unmapped_area(struct file *file,
 				      unsigned long uaddr, unsigned long len,
 				      unsigned long pgoff, unsigned long flags)
@@ -3869,6 +3944,7 @@ enum shmem_param {
 	Opt_usrquota_inode_hardlimit,
 	Opt_grpquota_block_hardlimit,
 	Opt_grpquota_inode_hardlimit,
+	Opt_no_split,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3900,6 +3976,7 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
 	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
 #endif
+	fsparam_u32   ("no_split",	Opt_no_split),
 	{}
 };
 
@@ -4065,6 +4142,10 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 				       "Group quota inode hardlimit too large.");
 		ctx->qlimits.grpquota_ihardlimit = size;
 		break;
+	case Opt_no_split:
+		ctx->no_split = result.uint_32;
+		ctx->seen |= SHMEM_SEEN_NO_SPLIT;
+		break;
 	}
 	return 0;
 
@@ -4261,6 +4342,8 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
 	if (sbinfo->huge)
 		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
+	if (sbinfo->huge && sbinfo->no_split)
+		seq_puts(seq, ",no_split");
 #endif
 	mpol = shmem_get_sbmpol(sbinfo);
 	shmem_show_mpol(seq, mpol);
@@ -4315,6 +4398,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 		if (!(ctx->seen & SHMEM_SEEN_INUMS))
 			ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
 		sbinfo->noswap = ctx->noswap;
+		sbinfo->no_split = ctx->no_split;
 	} else {
 		sb->s_flags |= SB_NOUSER;
 	}
@@ -4568,6 +4652,7 @@ static const struct super_operations shmem_ops = {
 static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
 	.map_pages	= filemap_map_pages,
+	.shmem_huge_fault	= shmem_huge_fault,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
-- 
2.33.0

next             reply	other threads:[~2024-01-10  9:20 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-10  9:20 Chen Haixiang [this message]
2024-01-10 12:00 ` David Hildenbrand
2024-01-10 12:44 ` Matthew Wilcox
2024-01-11  0:03 ` kernel test robot
2024-01-11  2:10 ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240110092028.1777-1-chenhaixiang3@huawei.com \
    --to=chenhaixiang3@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=hughd@google.com \
    --cc=linux-mm@kvack.org \
    --cc=liuyuntao10@huawei.com \
    --cc=louhongxiang@huawei.com \
    --cc=wangbin224@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox