linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Usama Arif <usamaarif642@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	david@redhat.com, linux-mm@kvack.org
Cc: hannes@cmpxchg.org, shakeel.butt@linux.dev, riel@surriel.com,
	ziy@nvidia.com, laoar.shao@gmail.com,
	baolin.wang@linux.alibaba.com, lorenzo.stoakes@oracle.com,
	Liam.Howlett@oracle.com, npache@redhat.com, ryan.roberts@arm.com,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	kernel-team@meta.com, Usama Arif <usamaarif642@gmail.com>
Subject: [PATCH 2/6] prctl: introduce PR_THP_POLICY_DEFAULT_NOHUGE for the process
Date: Thu, 15 May 2025 14:33:31 +0100	[thread overview]
Message-ID: <20250515133519.2779639-3-usamaarif642@gmail.com> (raw)
In-Reply-To: <20250515133519.2779639-1-usamaarif642@gmail.com>

This is set via the new PR_SET_THP_POLICY prctl.
This will set the MMF2_THP_VMA_DEFAULT_NOHUGE process flag
which changes the default of new VMAs to be VM_NOHUGEPAGE. The
call also modifies all existing VMAs that are not VM_HUGEPAGE
to be VM_NOHUGEPAGE. The policy is inherited during fork+exec.

This allows systems where the global policy is set to "always"
to effectively have THPs on madvise only for the process. In an
environment where different types of workloads are stacked on the
same machine,this will allow workloads that benefit from having
hugepages on an madvise basis only to do so, without regressing those
that benefit from having hugepages always.

Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
 include/linux/huge_mm.h                       |  1 +
 include/linux/mm_types.h                      |  5 +++-
 include/uapi/linux/prctl.h                    |  1 +
 kernel/sys.c                                  |  8 +++++++
 mm/huge_memory.c                              | 24 +++++++++++++++++++
 tools/include/uapi/linux/prctl.h              |  1 +
 .../trace/beauty/include/uapi/linux/prctl.h   |  1 +
 7 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e652ad9ddbbd..d46bba282701 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -262,6 +262,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 
 void vma_set_thp_policy(struct vm_area_struct *vma);
 void process_vmas_thp_default_huge(struct mm_struct *mm);
+void process_vmas_thp_default_nohuge(struct mm_struct *mm);
 
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 					 unsigned long vm_flags,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2fe93965e761..5e770411d8d1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1747,8 +1747,11 @@ enum {
 
 #define MMF2_THP_VMA_DEFAULT_HUGE		0
 #define MMF2_THP_VMA_DEFAULT_HUGE_MASK		(1 << MMF2_THP_VMA_DEFAULT_HUGE)
+#define MMF2_THP_VMA_DEFAULT_NOHUGE		1
+#define MMF2_THP_VMA_DEFAULT_NOHUGE_MASK	(1 << MMF2_THP_VMA_DEFAULT_NOHUGE)
 
-#define MMF2_INIT_MASK		(MMF2_THP_VMA_DEFAULT_HUGE_MASK)
+#define MMF2_INIT_MASK		(MMF2_THP_VMA_DEFAULT_HUGE_MASK |\
+				 MMF2_THP_VMA_DEFAULT_NOHUGE_MASK)
 
 static inline unsigned long mmf_init_flags(unsigned long flags)
 {
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 325c72f40a93..d25458f4db9e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -367,5 +367,6 @@ struct prctl_mm_map {
 #define PR_SET_THP_POLICY		78
 #define PR_GET_THP_POLICY		79
 #define PR_THP_POLICY_DEFAULT_HUGE	0
+#define PR_THP_POLICY_DEFAULT_NOHUGE	1
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1115f258f253..d91203e6dd0d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2663,6 +2663,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		if (!!test_bit(MMF2_THP_VMA_DEFAULT_HUGE, &me->mm->flags2))
 			error = PR_THP_POLICY_DEFAULT_HUGE;
+		else if (!!test_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &me->mm->flags2))
+			error = PR_THP_POLICY_DEFAULT_NOHUGE;
 		break;
 	case PR_SET_THP_POLICY:
 		if (arg3 || arg4 || arg5)
@@ -2672,8 +2674,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		switch (arg2) {
 		case PR_THP_POLICY_DEFAULT_HUGE:
 			set_bit(MMF2_THP_VMA_DEFAULT_HUGE, &me->mm->flags2);
+			clear_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &me->mm->flags2);
 			process_vmas_thp_default_huge(me->mm);
 			break;
+		case PR_THP_POLICY_DEFAULT_NOHUGE:
+			clear_bit(MMF2_THP_VMA_DEFAULT_HUGE, &me->mm->flags2);
+			set_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &me->mm->flags2);
+			process_vmas_thp_default_nohuge(me->mm);
+			break;
 		default:
 			return -EINVAL;
 		}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 64f66d5295e8..9d70a365ced3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -104,6 +104,8 @@ void vma_set_thp_policy(struct vm_area_struct *vma)
 
 	if (test_bit(MMF2_THP_VMA_DEFAULT_HUGE, &mm->flags2))
 		vm_flags_set(vma, VM_HUGEPAGE);
+	else if (test_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &mm->flags2))
+		vm_flags_set(vma, VM_NOHUGEPAGE);
 }
 
 static void vmas_thp_default_huge(struct mm_struct *mm)
@@ -129,6 +131,28 @@ void process_vmas_thp_default_huge(struct mm_struct *mm)
 	vmas_thp_default_huge(mm);
 }
 
+static void vmas_thp_default_nohuge(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	unsigned long vm_flags;
+
+	VMA_ITERATOR(vmi, mm, 0);
+	for_each_vma(vmi, vma) {
+		vm_flags = vma->vm_flags;
+		if (vm_flags & VM_HUGEPAGE)
+			continue;
+		vm_flags_set(vma, VM_NOHUGEPAGE);
+	}
+}
+
+void process_vmas_thp_default_nohuge(struct mm_struct *mm)
+{
+	if (test_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &mm->flags2))
+		return;
+
+	set_bit(MMF2_THP_VMA_DEFAULT_NOHUGE, &mm->flags2);
+	vmas_thp_default_nohuge(mm);
+}
 
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 					 unsigned long vm_flags,
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index f5945ebfe3f2..e03d0ed890c5 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -331,5 +331,6 @@ struct prctl_mm_map {
 #define PR_SET_THP_POLICY		78
 #define PR_GET_THP_POLICY		79
 #define PR_THP_POLICY_DEFAULT_HUGE	0
+#define PR_THP_POLICY_DEFAULT_NOHUGE	1
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 325c72f40a93..d25458f4db9e 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -367,5 +367,6 @@ struct prctl_mm_map {
 #define PR_SET_THP_POLICY		78
 #define PR_GET_THP_POLICY		79
 #define PR_THP_POLICY_DEFAULT_HUGE	0
+#define PR_THP_POLICY_DEFAULT_NOHUGE	1
 
 #endif /* _LINUX_PRCTL_H */
-- 
2.47.1



  parent reply	other threads:[~2025-05-15 13:35 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-15 13:33 [PATCH 0/6] prctl: introduce PR_SET/GET_THP_POLICY Usama Arif
2025-05-15 13:33 ` [PATCH 1/6] prctl: introduce PR_THP_POLICY_DEFAULT_HUGE for the process Usama Arif
2025-05-15 14:40   ` Lorenzo Stoakes
2025-05-15 14:44     ` David Hildenbrand
2025-05-15 14:56       ` Usama Arif
2025-05-15 14:58         ` David Hildenbrand
2025-05-15 15:18           ` Lorenzo Stoakes
2025-05-15 15:45       ` Liam R. Howlett
2025-05-15 15:57         ` David Hildenbrand
2025-05-15 16:38           ` Lorenzo Stoakes
2025-05-15 17:29             ` David Hildenbrand
2025-05-15 18:09               ` Liam R. Howlett
2025-05-15 18:21                 ` Lorenzo Stoakes
2025-05-15 18:42                   ` Zi Yan
2025-05-15 21:04                     ` Lorenzo Stoakes
2025-05-15 18:46                   ` Usama Arif
2025-05-15 19:20                 ` David Hildenbrand
2025-05-15 15:28     ` Usama Arif
2025-05-15 16:06       ` Lorenzo Stoakes
2025-05-15 16:11         ` David Hildenbrand
2025-05-15 18:08           ` Lorenzo Stoakes
2025-05-15 19:12             ` David Hildenbrand
2025-05-15 20:35               ` Lorenzo Stoakes
2025-05-16  7:45                 ` David Hildenbrand
2025-05-16 10:57                   ` Lorenzo Stoakes
2025-05-16 11:24                     ` David Hildenbrand
2025-05-16 12:57                       ` Lorenzo Stoakes
2025-05-16 17:19                         ` Usama Arif
2025-05-16 17:51                           ` Lorenzo Stoakes
2025-05-16 19:34                             ` Usama Arif
2025-05-17 16:20                         ` Is number of process_madvise()-able ranges limited to 8? (was Re: [PATCH 1/6] prctl: introduce PR_THP_POLICY_DEFAULT_HUGE for the process) SeongJae Park
2025-05-17 18:50                           ` Lorenzo Stoakes
2025-05-17 20:25                             ` SeongJae Park
2025-05-17 19:01                         ` [PATCH 1/6] prctl: introduce PR_THP_POLICY_DEFAULT_HUGE for the process Lorenzo Stoakes
2025-05-15 16:47         ` Usama Arif
2025-05-15 18:36           ` Lorenzo Stoakes
2025-05-15 19:17             ` David Hildenbrand
2025-05-15 20:42               ` Lorenzo Stoakes
2025-05-16  6:12   ` kernel test robot
2025-05-15 13:33 ` Usama Arif [this message]
2025-05-16  8:19   ` [PATCH 2/6] prctl: introduce PR_THP_POLICY_DEFAULT_NOHUGE " kernel test robot
2025-05-15 13:33 ` [PATCH 3/6] prctl: introduce PR_THP_POLICY_SYSTEM " Usama Arif
2025-05-15 13:33 ` [PATCH 4/6] selftests: prctl: introduce tests for PR_THP_POLICY_DEFAULT_NOHUGE Usama Arif
2025-05-15 13:33 ` [PATCH 5/6] selftests: prctl: introduce tests for PR_THP_POLICY_DEFAULT_HUGE Usama Arif
2025-05-15 13:33 ` [PATCH 6/6] docs: transhuge: document process level THP controls Usama Arif
2025-05-15 13:55 ` [PATCH 0/6] prctl: introduce PR_SET/GET_THP_POLICY Lorenzo Stoakes
2025-05-15 14:50   ` Usama Arif
2025-05-15 15:15     ` Lorenzo Stoakes
2025-05-15 15:54       ` Usama Arif
2025-05-15 16:04         ` David Hildenbrand
2025-05-15 16:24         ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250515133519.2779639-3-usamaarif642@gmail.com \
    --to=usamaarif642@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=npache@redhat.com \
    --cc=riel@surriel.com \
    --cc=ryan.roberts@arm.com \
    --cc=shakeel.butt@linux.dev \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox