From: Gregory Price <gourry.memverge@gmail.com>
To: linux-mm@kvack.org, jgroves@micron.com, ravis.opensrc@micron.com,
sthanneeru@micron.com, emirakhur@micron.com, Hasan.Maruf@amd.com
Cc: linux-doc@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-api@vger.kernel.org, linux-arch@vger.kernel.org,
linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
arnd@arndb.de, tglx@linutronix.de, luto@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
x86@kernel.org, hpa@zytor.com, mhocko@kernel.org, tj@kernel.org,
ying.huang@intel.com, gregory.price@memverge.com, corbet@lwn.net,
rakie.kim@sk.com, hyeongtak.ji@sk.com, honggyu.kim@sk.com,
vtavarespetr@micron.com, peterz@infradead.org
Subject: [RFC PATCH 04/11] mm/mempolicy: create struct mempolicy_args for creating new mempolicies
Date: Wed, 6 Dec 2023 19:27:52 -0500 [thread overview]
Message-ID: <20231207002759.51418-5-gregory.price@memverge.com> (raw)
In-Reply-To: <20231207002759.51418-1-gregory.price@memverge.com>
This patch adds a new kernel structure `struct mempolicy_args`,
intended to be used for an extensible get/set_mempolicy interface.
This implements the fields required to support the existing syscall
interfaces interfaces, but does not expose any user-facing arg
structure.
mpol_new is refactored to take the argument structure so that future
mempolicy extensions can all be managed in the mempolicy constructor.
The get_mempolicy and mbind syscalls are refactored to utilize the
new argument structure, as are all the callers of mpol_new() and
do_set_mempolicy.
Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
include/linux/mempolicy.h | 14 ++++++++
mm/mempolicy.c | 69 +++++++++++++++++++++++++++++----------
2 files changed, 65 insertions(+), 18 deletions(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ba09167e80f7..117c5395c6eb 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -61,6 +61,20 @@ struct mempolicy {
} wil;
};
+/*
+ * Describes settings of a mempolicy during set/get syscalls and
+ * kernel internal calls to do_set_mempolicy()
+ */
+struct mempolicy_args {
+ unsigned short mode; /* policy mode */
+ unsigned short mode_flags; /* policy mode flags */
+ nodemask_t *policy_nodes; /* get/set/mbind */
+ int policy_node; /* get: policy node information */
+ unsigned long addr; /* get: vma address */
+ int addr_node; /* get: node the address belongs to */
+ int home_node; /* mbind: use MPOL_MF_HOME_NODE */
+};
+
/*
* Support for managing mempolicy data objects (clone, copy, destroy)
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index eec807d0c6a1..4c343218c033 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -268,10 +268,12 @@ static int mpol_set_nodemask(struct mempolicy *pol,
* This function just creates a new policy, does some check and simple
* initialization. You must invoke mpol_set_nodemask() to set nodes.
*/
-static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
- nodemask_t *nodes)
+static struct mempolicy *mpol_new(struct mempolicy_args *args)
{
struct mempolicy *policy;
+ unsigned short mode = args->mode;
+ unsigned short flags = args->mode_flags;
+ nodemask_t *nodes = args->policy_nodes;
if (mode == MPOL_DEFAULT) {
if (nodes && !nodes_empty(*nodes))
@@ -820,8 +822,7 @@ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
}
/* Set the process memory policy */
-static long do_set_mempolicy(unsigned short mode, unsigned short flags,
- nodemask_t *nodes)
+static long do_set_mempolicy(struct mempolicy_args *args)
{
struct mempolicy *new, *old;
NODEMASK_SCRATCH(scratch);
@@ -830,14 +831,14 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
if (!scratch)
return -ENOMEM;
- new = mpol_new(mode, flags, nodes);
+ new = mpol_new(args);
if (IS_ERR(new)) {
ret = PTR_ERR(new);
goto out;
}
task_lock(current);
- ret = mpol_set_nodemask(new, nodes, scratch);
+ ret = mpol_set_nodemask(new, args->policy_nodes, scratch);
if (ret) {
task_unlock(current);
mpol_put(new);
@@ -1235,8 +1236,7 @@ static struct folio *alloc_migration_target_by_mpol(struct folio *src,
#endif
static long do_mbind(unsigned long start, unsigned long len,
- unsigned short mode, unsigned short mode_flags,
- nodemask_t *nmask, unsigned long flags)
+ struct mempolicy_args *margs, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
@@ -1256,7 +1256,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (start & ~PAGE_MASK)
return -EINVAL;
- if (mode == MPOL_DEFAULT)
+ if (margs->mode == MPOL_DEFAULT)
flags &= ~MPOL_MF_STRICT;
len = PAGE_ALIGN(len);
@@ -1267,7 +1267,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (end == start)
return 0;
- new = mpol_new(mode, mode_flags, nmask);
+ new = mpol_new(margs);
if (IS_ERR(new))
return PTR_ERR(new);
@@ -1284,7 +1284,8 @@ static long do_mbind(unsigned long start, unsigned long len,
NODEMASK_SCRATCH(scratch);
if (scratch) {
mmap_write_lock(mm);
- err = mpol_set_nodemask(new, nmask, scratch);
+ err = mpol_set_nodemask(new, margs->policy_nodes,
+ scratch);
if (err)
mmap_write_unlock(mm);
} else
@@ -1298,7 +1299,7 @@ static long do_mbind(unsigned long start, unsigned long len,
* Lock the VMAs before scanning for pages to migrate,
* to ensure we don't miss a concurrently inserted page.
*/
- nr_failed = queue_pages_range(mm, start, end, nmask,
+ nr_failed = queue_pages_range(mm, start, end, margs->policy_nodes,
flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist);
if (nr_failed < 0) {
@@ -1503,6 +1504,7 @@ static long kernel_mbind(unsigned long start, unsigned long len,
unsigned long mode, const unsigned long __user *nmask,
unsigned long maxnode, unsigned int flags)
{
+ struct mempolicy_args margs;
unsigned short mode_flags;
nodemask_t nodes;
int lmode = mode;
@@ -1517,7 +1519,12 @@ static long kernel_mbind(unsigned long start, unsigned long len,
if (err)
return err;
- return do_mbind(start, len, lmode, mode_flags, &nodes, flags);
+ memset(&margs, 0, sizeof(margs));
+ margs.mode = lmode;
+ margs.mode_flags = mode_flags;
+ margs.policy_nodes = &nodes;
+
+ return do_mbind(start, len, &margs, flags);
}
SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len,
@@ -1598,6 +1605,7 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask,
unsigned long maxnode)
{
+ struct mempolicy_args args;
unsigned short mode_flags;
nodemask_t nodes;
int lmode = mode;
@@ -1611,7 +1619,12 @@ static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask,
if (err)
return err;
- return do_set_mempolicy(lmode, mode_flags, &nodes);
+ memset(&args, 0, sizeof(args));
+ args.mode = lmode;
+ args.mode_flags = mode_flags;
+ args.policy_nodes = &nodes;
+
+ return do_set_mempolicy(&args);
}
SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
@@ -2877,6 +2890,7 @@ static int shared_policy_replace(struct shared_policy *sp, pgoff_t start,
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
{
int ret;
+ struct mempolicy_args margs;
sp->root = RB_ROOT; /* empty tree == default mempolicy */
rwlock_init(&sp->lock);
@@ -2889,8 +2903,12 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
if (!scratch)
goto put_mpol;
+ memset(&margs, 0, sizeof(margs));
+ margs.mode = mpol->mode;
+ margs.mode_flags = mpol->flags;
+ margs.policy_nodes = &mpol->w.user_nodemask;
/* contextualize the tmpfs mount point mempolicy to this file */
- npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
+ npol = mpol_new(&margs);
if (IS_ERR(npol))
goto free_scratch; /* no valid nodemask intersection */
@@ -2998,6 +3016,7 @@ static inline void __init check_numabalancing_enable(void)
void __init numa_policy_init(void)
{
+ struct mempolicy_args args;
nodemask_t interleave_nodes;
unsigned long largest = 0;
int nid, prefer = 0;
@@ -3043,7 +3062,11 @@ void __init numa_policy_init(void)
if (unlikely(nodes_empty(interleave_nodes)))
node_set(prefer, interleave_nodes);
- if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
+ memset(&args, 0, sizeof(args));
+ args.mode = MPOL_INTERLEAVE;
+ args.policy_nodes = &interleave_nodes;
+
+ if (do_set_mempolicy(&args))
pr_err("%s: interleaving failed\n", __func__);
check_numabalancing_enable();
@@ -3052,7 +3075,12 @@ void __init numa_policy_init(void)
/* Reset policy of current process to default */
void numa_default_policy(void)
{
- do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
+ struct mempolicy_args args;
+
+ memset(&args, 0, sizeof(args));
+ args.mode = MPOL_DEFAULT;
+
+ do_set_mempolicy(&args);
}
/*
@@ -3082,6 +3110,7 @@ static const char * const policy_modes[] =
*/
int mpol_parse_str(char *str, struct mempolicy **mpol)
{
+ struct mempolicy_args margs;
struct mempolicy *new = NULL;
unsigned short mode_flags;
nodemask_t nodes;
@@ -3168,7 +3197,11 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
goto out;
}
- new = mpol_new(mode, mode_flags, &nodes);
+ memset(&margs, 0, sizeof(margs));
+ margs.mode = mode;
+ margs.mode_flags = mode_flags;
+ margs.policy_nodes = &nodes;
+ new = mpol_new(&margs);
if (IS_ERR(new))
goto out;
--
2.39.1
next prev parent reply other threads:[~2023-12-07 0:28 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-07 0:27 [RFC PATCH 00/11] mempolicy2, mbind2, and weighted interleave Gregory Price
2023-12-07 0:27 ` [RFC PATCH 01/11] mm/mempolicy: implement the sysfs-based weighted_interleave interface Gregory Price
2023-12-07 21:56 ` Davidlohr Bueso
2023-12-07 22:17 ` Davidlohr Bueso
2023-12-08 0:11 ` Gregory Price
2023-12-07 0:27 ` [RFC PATCH 02/11] mm/mempolicy: introduce MPOL_WEIGHTED_INTERLEAVE for weighted interleaving Gregory Price
2023-12-07 0:27 ` [RFC PATCH 03/11] mm/mempolicy: refactor sanitize_mpol_flags for reuse Gregory Price
2023-12-07 0:27 ` Gregory Price [this message]
2023-12-07 0:27 ` [RFC PATCH 05/11] mm/mempolicy: refactor kernel_get_mempolicy for code re-use Gregory Price
2023-12-07 0:27 ` [RFC PATCH 06/11] mm/mempolicy: allow home_node to be set by mpol_new Gregory Price
2023-12-07 0:27 ` [RFC PATCH 07/11] mm/mempolicy: add userland mempolicy arg structure Gregory Price
2023-12-07 7:13 ` Arnd Bergmann
2023-12-07 14:58 ` Gregory Price
2023-12-07 15:43 ` Arnd Bergmann
2023-12-08 0:05 ` Gregory Price
2023-12-07 0:27 ` [RFC PATCH 08/11] mm/mempolicy: add set_mempolicy2 syscall Gregory Price
2023-12-07 0:27 ` [RFC PATCH 10/11] mm/mempolicy: add the mbind2 syscall Gregory Price
2023-12-07 0:27 ` [RFC PATCH 11/11] mm/mempolicy: extend set_mempolicy2 and mbind2 to support weighted interleave Gregory Price
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231207002759.51418-5-gregory.price@memverge.com \
--to=gourry.memverge@gmail.com \
--cc=Hasan.Maruf@amd.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=bp@alien8.de \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=emirakhur@micron.com \
--cc=gregory.price@memverge.com \
--cc=honggyu.kim@sk.com \
--cc=hpa@zytor.com \
--cc=hyeongtak.ji@sk.com \
--cc=jgroves@micron.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@kernel.org \
--cc=mhocko@kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rakie.kim@sk.com \
--cc=ravis.opensrc@micron.com \
--cc=sthanneeru@micron.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vtavarespetr@micron.com \
--cc=x86@kernel.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox