[PATCH 0/2] x86: Reduce pressure on stack from cpumask usage

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/2] x86: Reduce pressure on stack from cpumask usage
@ 2007-11-21  8:02 travis
  2007-11-21  8:02 ` [PATCH 1/2] cpumask: Convert cpumask_of_cpu to static array travis
  2007-11-21  8:02 ` [PATCH 2/2] cpumask: Convert set_cpus_allowed to use ptr for cpumask arg travis
  0 siblings, 2 replies; 3+ messages in thread
From: travis @ 2007-11-21  8:02 UTC (permalink / raw)
  To: Andrew Morton, Andi Kleen, Christoph Lameter
  Cc: mingo, apw, Jack Steiner, linux-mm, linux-kernel

Convert cpumask_of_cpu to use a static percpu data array and
set_cpus_allowed to pass the cpumask_t arg as a pointer.

Conditioned on NR_CPUS > BITS_PER_LONG.

Compiled and tested for i386 and x86_64.  I'd appreciate
feedback on other architectures.

(Note: there are still compile/test errors when NR_CPUS > 256
due to cpu id being 8 bits among other things.)

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] cpumask: Convert cpumask_of_cpu to static array
  2007-11-21  8:02 [PATCH 0/2] x86: Reduce pressure on stack from cpumask usage travis
@ 2007-11-21  8:02 ` travis
  2007-11-21  8:02 ` [PATCH 2/2] cpumask: Convert set_cpus_allowed to use ptr for cpumask arg travis
  1 sibling, 0 replies; 3+ messages in thread
From: travis @ 2007-11-21  8:02 UTC (permalink / raw)
  To: Andrew Morton, Andi Kleen, Christoph Lameter
  Cc: mingo, apw, Jack Steiner, linux-mm, linux-kernel

[-- Attachment #1: cpumask-to-percpu --]
[-- Type: text/plain, Size: 2866 bytes --]

Here is a simple patch to use a per cpu cpumask instead of constructing 
one on the stack. I have been running awhile with this one:

Do not use stack to allocate cpumask for cpumask_of_cpu

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Modified to be used only if NR_CPUS is greater than the BITS_PER_LONG
as well as fix cases where !SMP and both NR_CPUS > and < BITS_PER_LONG.

Signed-off-by: Mike Travis <travis@sgi.com>

---
 arch/x86/kernel/process_64.c |    1 +
 arch/x86/mm/numa_64.c        |    1 +
 include/linux/cpumask.h      |    4 ++++
 include/linux/sched.h        |    4 ++++
 kernel/sched.c               |    9 +++++++++
 5 files changed, 19 insertions(+)

--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -222,6 +222,9 @@ int __next_cpu(int n, const cpumask_t *s
 #define next_cpu(n, src)	1
 #endif
 
+#if defined(CONFIG_SMP) && (NR_CPUS > BITS_PER_LONG)
+#define cpumask_of_cpu(cpu)    per_cpu(cpu_mask, cpu)
+#else
 #define cpumask_of_cpu(cpu)						\
 ({									\
 	typeof(_unused_cpumask_arg_) m;					\
@@ -233,6 +236,7 @@ int __next_cpu(int n, const cpumask_t *s
 	}								\
 	m;								\
 })
+#endif
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6732,6 +6732,11 @@ static void init_cfs_rq(struct cfs_rq *c
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 }
 
+#if NR_CPUS > BITS_PER_LONG
+DEFINE_PER_CPU(cpumask_t, cpu_mask);
+EXPORT_PER_CPU_SYMBOL(cpu_mask);
+#endif
+
 void __init sched_init(void)
 {
 	int highest_cpu = 0;
@@ -6741,6 +6746,10 @@ void __init sched_init(void)
 		struct rt_prio_array *array;
 		struct rq *rq;
 
+#if NR_CPUS > BITS_PER_LONG
+		/* This makes cpumask_of_cpu work */
+		cpu_set(i, per_cpu(cpu_mask, i));
+#endif
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
 		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -36,6 +36,7 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/tick.h>
+#include <linux/percpu.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -11,6 +11,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/nodemask.h>
+#include <linux/sched.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2024,6 +2024,10 @@ static inline void migration_init(void)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
+#if defined(CONFIG_SMP) && (NR_CPUS > BITS_PER_LONG)
+DECLARE_PER_CPU(cpumask_t, cpu_mask);
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] cpumask: Convert set_cpus_allowed to use ptr for cpumask arg
  2007-11-21  8:02 [PATCH 0/2] x86: Reduce pressure on stack from cpumask usage travis
  2007-11-21  8:02 ` [PATCH 1/2] cpumask: Convert cpumask_of_cpu to static array travis
@ 2007-11-21  8:02 ` travis
  1 sibling, 0 replies; 3+ messages in thread
From: travis @ 2007-11-21  8:02 UTC (permalink / raw)
  To: Andrew Morton, Andi Kleen, Christoph Lameter
  Cc: mingo, apw, Jack Steiner, linux-mm, linux-kernel

[-- Attachment #1: set_cpus_allowed-use-ptr-arg --]
[-- Type: text/plain, Size: 3131 bytes --]

Avoid pushing cpumask variables onto stack when calling set_cpus_allowed
when NR_CPUS > BITS_PER_LONG by passing cpumast_t arg as a pointer.

Signed-off-by: Mike Travis <travis@sgi.com>

---
 include/linux/sched.h |   11 +++++++++--
 kernel/sched.c        |   22 ++++++++++++++++------
 2 files changed, 25 insertions(+), 8 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1435,8 +1435,15 @@ static inline void put_task_struct(struc
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-#ifdef CONFIG_SMP
-extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
+#if defined(CONFIG_SMP)
+#  if NR_CPUS > BITS_PER_LONG
+     /* avoid pushing cpumask variable onto the stack */
+#    define set_cpus_allowed(p, new_mask) __set_cpus_allowed((p), &(new_mask))
+     extern int __set_cpus_allowed(struct task_struct *p, cpumask_t *new_mask);
+#  else
+#    define set_cpus_allowed(p, new_mask) __set_cpus_allowed((p), (new_mask))
+     extern int __set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
+#  endif
 #else
 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5036,7 +5036,16 @@ static inline void sched_init_granularit
  * task must not exit() & deallocate itself prematurely.  The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
+#if NR_CPUS > BITS_PER_LONG
+/*
+ * avoid pushing a large CPU count cpumask_t variable onto stack
+ * (relies on the fact that new_mask is a const arg to subfunctions)
+ */
+#define CPU_MASK_VAR(v)		*v
+#else
+#define CPU_MASK_VAR(v)		v
+#endif
+int __set_cpus_allowed(struct task_struct *p, cpumask_t CPU_MASK_VAR(new_mask))
 {
 	struct migration_req req;
 	unsigned long flags;
@@ -5044,17 +5053,17 @@ int set_cpus_allowed(struct task_struct 
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(new_mask, cpu_online_map)) {
+	if (!cpus_intersects(CPU_MASK_VAR(new_mask), cpu_online_map)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	p->cpus_allowed = new_mask;
+	p->cpus_allowed = CPU_MASK_VAR(new_mask);
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpu_isset(task_cpu(p), new_mask))
+	if (cpu_isset(task_cpu(p), CPU_MASK_VAR(new_mask)))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+	if (migrate_task(p, any_online_cpu(CPU_MASK_VAR(new_mask)), &req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -5067,7 +5076,8 @@ out:
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(set_cpus_allowed);
+/* keep CPU_MASK_VAR local to __set_cpus_allowed */
+EXPORT_SYMBOL_GPL(__set_cpus_allowed);
 
 /*
  * Move (not current) task off this cpu, onto dest cpu.  We're doing

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-11-21  8:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-21  8:02 [PATCH 0/2] x86: Reduce pressure on stack from cpumask usage travis
2007-11-21  8:02 ` [PATCH 1/2] cpumask: Convert cpumask_of_cpu to static array travis
2007-11-21  8:02 ` [PATCH 2/2] cpumask: Convert set_cpus_allowed to use ptr for cpumask arg travis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox