From: Mike Travis <travis@sgi.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Richard Henderson <rth@twiddle.net>,
David Howells <dhowells@redhat.com>,
Tony Luck <tony.luck@intel.com>,
Paul Mackerras <paulus@samba.org>,
Anton Blanchard <anton@samba.org>,
"David S. Miller" <davem@davemloft.net>,
"William L. Irwin" <wli@holomorphy.com>,
Thomas Gleixner <tglx@linutronix.de>,
"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH 06/12] cpumask: create pointer to node_to_cpumask array element v2
Date: Tue, 25 Mar 2008 18:38:17 -0700 [thread overview]
Message-ID: <20080326013812.622459000@polaris-admin.engr.sgi.com> (raw)
In-Reply-To: <20080326013811.569646000@polaris-admin.engr.sgi.com>
[-- Attachment #1: node_to_cpumask_ptr --]
[-- Type: text/plain, Size: 13224 bytes --]
Create a simple macro to always return a pointer to the
node_to_cpumask(node) value. This relies on compiler optimization
to remove the extra indirection:
#define node_to_cpumask_ptr(v, node) \
cpumask_t _##v = node_to_cpumask(node), *v = &_##v
For those systems with a large cpumask size, then a true pointer
to the array element is used:
#define node_to_cpumask_ptr(v, node) \
cpumask_t *v = &(node_to_cpumask_map[node])
A node_to_cpumask_ptr_next() macro is provided to access another
node_to_cpumask value.
This removes 10256 bytes of stack usage.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
# alpha
Cc: Richard Henderson <rth@twiddle.net>
# fujitsu
Cc: David Howells <dhowells@redhat.com>
# ia64
Cc: Tony Luck <tony.luck@intel.com>
# powerpc
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
# sparc
Cc: David S. Miller <davem@davemloft.net>
Cc: William L. Irwin <wli@holomorphy.com>
# x86
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
v2: rebased on linux-2.6.git + linux-2.6-x86.git
One checkpatch error that I don't think can be fixed (was already in source):
ERROR: Macros with complex values should be enclosed in parenthesis
#230: FILE: include/linux/topology.h:49:
#define for_each_node_with_cpus(node) \
for_each_online_node(node) \
if (nr_cpus_node(node))
total: 1 errors, 0 warnings, 315 lines checked
---
drivers/base/node.c | 4 ++--
drivers/pci/pci-driver.c | 4 ++--
include/asm-alpha/topology.h | 3 +--
include/asm-frv/topology.h | 4 +---
include/asm-generic/topology.h | 14 ++++++++++++++
include/asm-ia64/topology.h | 5 +++++
include/asm-powerpc/topology.h | 3 +--
include/asm-x86/topology.h | 15 +++++++++++++--
include/linux/topology.h | 13 ++++++-------
kernel/sched.c | 29 ++++++++++++++---------------
mm/page_alloc.c | 6 +++---
mm/slab.c | 5 ++---
mm/vmscan.c | 18 ++++++++----------
net/sunrpc/svc.c | 4 ++--
14 files changed, 74 insertions(+), 53 deletions(-)
--- linux.trees.git.orig/drivers/base/node.c
+++ linux.trees.git/drivers/base/node.c
@@ -22,13 +22,13 @@ static struct sysdev_class node_class =
static ssize_t node_read_cpumap(struct sys_device * dev, char * buf)
{
struct node *node_dev = to_node(dev);
- cpumask_t mask = node_to_cpumask(node_dev->sysdev.id);
+ node_to_cpumask_ptr(mask, node_dev->sysdev.id);
int len;
/* 2004/06/03: buf currently PAGE_SIZE, need > 1 char per 4 bits. */
BUILD_BUG_ON(MAX_NUMNODES/4 > PAGE_SIZE/2);
- len = cpumask_scnprintf(buf, PAGE_SIZE-1, mask);
+ len = cpumask_scnprintf(buf, PAGE_SIZE-1, *mask);
len += sprintf(buf + len, "\n");
return len;
}
--- linux.trees.git.orig/drivers/pci/pci-driver.c
+++ linux.trees.git/drivers/pci/pci-driver.c
@@ -184,8 +184,8 @@ static int pci_call_probe(struct pci_dri
int node = dev_to_node(&dev->dev);
if (node >= 0) {
- cpumask_t nodecpumask = node_to_cpumask(node);
- set_cpus_allowed(current, &nodecpumask);
+ node_to_cpumask_ptr(nodecpumask, node);
+ set_cpus_allowed(current, nodecpumask);
}
/* And set default memory allocation policy */
--- linux.trees.git.orig/include/asm-alpha/topology.h
+++ linux.trees.git/include/asm-alpha/topology.h
@@ -41,8 +41,7 @@ static inline cpumask_t node_to_cpumask(
#define pcibus_to_cpumask(bus) (cpu_online_map)
-#else /* CONFIG_NUMA */
-# include <asm-generic/topology.h>
#endif /* !CONFIG_NUMA */
+# include <asm-generic/topology.h>
#endif /* _ASM_ALPHA_TOPOLOGY_H */
--- linux.trees.git.orig/include/asm-frv/topology.h
+++ linux.trees.git/include/asm-frv/topology.h
@@ -5,10 +5,8 @@
#error NUMA not supported yet
-#else /* !CONFIG_NUMA */
+#endif /* CONFIG_NUMA */
#include <asm-generic/topology.h>
-#endif /* CONFIG_NUMA */
-
#endif /* _ASM_TOPOLOGY_H */
--- linux.trees.git.orig/include/asm-generic/topology.h
+++ linux.trees.git/include/asm-generic/topology.h
@@ -27,6 +27,8 @@
#ifndef _ASM_GENERIC_TOPOLOGY_H
#define _ASM_GENERIC_TOPOLOGY_H
+#ifndef CONFIG_NUMA
+
/* Other architectures wishing to use this simple topology API should fill
in the below functions as appropriate in their own <asm/topology.h> file. */
#ifndef cpu_to_node
@@ -52,4 +54,16 @@
)
#endif
+#endif /* CONFIG_NUMA */
+
+/* returns pointer to cpumask for specified node */
+#ifndef node_to_cpumask_ptr
+
+#define node_to_cpumask_ptr(v, node) \
+ cpumask_t _##v = node_to_cpumask(node), *v = &_##v
+
+#define node_to_cpumask_ptr_next(v, node) \
+ _##v = node_to_cpumask(node)
+#endif
+
#endif /* _ASM_GENERIC_TOPOLOGY_H */
--- linux.trees.git.orig/include/asm-ia64/topology.h
+++ linux.trees.git/include/asm-ia64/topology.h
@@ -116,6 +116,11 @@ void build_cpu_to_node_map(void);
#define smt_capable() (smp_num_siblings > 1)
#endif
+#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
+ CPU_MASK_ALL : \
+ node_to_cpumask(pcibus_to_node(bus)) \
+ )
+
#include <asm-generic/topology.h>
#endif /* _ASM_IA64_TOPOLOGY_H */
--- linux.trees.git.orig/include/asm-powerpc/topology.h
+++ linux.trees.git/include/asm-powerpc/topology.h
@@ -96,11 +96,10 @@ static inline void sysfs_remove_device_f
{
}
+#endif /* CONFIG_NUMA */
#include <asm-generic/topology.h>
-#endif /* CONFIG_NUMA */
-
#ifdef CONFIG_SMP
#include <asm/cputable.h>
#define smt_capable() (cpu_has_feature(CPU_FTR_SMT))
--- linux.trees.git.orig/include/asm-x86/topology.h
+++ linux.trees.git/include/asm-x86/topology.h
@@ -89,6 +89,17 @@ static inline int cpu_to_node(int cpu)
#endif
return per_cpu(x86_cpu_to_node_map, cpu);
}
+
+#ifdef CONFIG_NUMA
+
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+#define node_to_cpumask_ptr(v, node) \
+ cpumask_t *v = &(node_to_cpumask_map[node])
+
+#define node_to_cpumask_ptr_next(v, node) \
+ v = &(node_to_cpumask_map[node])
+#endif
+
#endif /* CONFIG_X86_64 */
/*
@@ -186,10 +197,10 @@ static inline void set_mp_bus_to_node(in
{
}
-#include <asm-generic/topology.h>
-
#endif
+#include <asm-generic/topology.h>
+
extern cpumask_t cpu_coregroup_map(int cpu);
#ifdef ENABLE_TOPO_DEFINES
--- linux.trees.git.orig/include/linux/topology.h
+++ linux.trees.git/include/linux/topology.h
@@ -38,16 +38,15 @@
#endif
#ifndef nr_cpus_node
-#define nr_cpus_node(node) \
- ({ \
- cpumask_t __tmp__; \
- __tmp__ = node_to_cpumask(node); \
- cpus_weight(__tmp__); \
+#define nr_cpus_node(node) \
+ ({ \
+ node_to_cpumask_ptr(__tmp__, node); \
+ cpus_weight(*__tmp__); \
})
#endif
-#define for_each_node_with_cpus(node) \
- for_each_online_node(node) \
+#define for_each_node_with_cpus(node) \
+ for_each_online_node(node) \
if (nr_cpus_node(node))
void arch_update_cpu_topology(void);
--- linux.trees.git.orig/kernel/sched.c
+++ linux.trees.git/kernel/sched.c
@@ -6252,7 +6252,7 @@ init_sched_build_groups(cpumask_t span,
*
* Should use nodemask_t.
*/
-static int find_next_best_node(int node, unsigned long *used_nodes)
+static int find_next_best_node(int node, nodemask_t *used_nodes)
{
int i, n, val, min_val, best_node = 0;
@@ -6266,7 +6266,7 @@ static int find_next_best_node(int node,
continue;
/* Skip already used nodes */
- if (test_bit(n, used_nodes))
+ if (node_isset(n, *used_nodes))
continue;
/* Simple min distance search */
@@ -6278,14 +6278,13 @@ static int find_next_best_node(int node,
}
}
- set_bit(best_node, used_nodes);
+ node_set(best_node, *used_nodes);
return best_node;
}
/**
* sched_domain_node_span - get a cpumask for a node's sched_domain
* @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
*
* Given a node, construct a good cpumask for its sched_domain to span. It
* should be one that prevents unnecessary balancing, but also spreads tasks
@@ -6293,22 +6292,22 @@ static int find_next_best_node(int node,
*/
static cpumask_t sched_domain_node_span(int node)
{
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
- cpumask_t span, nodemask;
+ nodemask_t used_nodes;
+ cpumask_t span;
+ node_to_cpumask_ptr(nodemask, node);
int i;
cpus_clear(span);
- bitmap_zero(used_nodes, MAX_NUMNODES);
+ nodes_clear(used_nodes);
- nodemask = node_to_cpumask(node);
- cpus_or(span, span, nodemask);
- set_bit(node, used_nodes);
+ cpus_or(span, span, *nodemask);
+ node_set(node, used_nodes);
for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
- int next_node = find_next_best_node(node, used_nodes);
+ int next_node = find_next_best_node(node, &used_nodes);
- nodemask = node_to_cpumask(next_node);
- cpus_or(span, span, nodemask);
+ node_to_cpumask_ptr_next(nodemask, next_node);
+ cpus_or(span, span, *nodemask);
}
return span;
@@ -6705,6 +6704,7 @@ static int build_sched_domains(const cpu
for (j = 0; j < MAX_NUMNODES; j++) {
cpumask_t tmp, notcovered;
int n = (i + j) % MAX_NUMNODES;
+ node_to_cpumask_ptr(nodemask, n);
cpus_complement(notcovered, covered);
cpus_and(tmp, notcovered, *cpu_map);
@@ -6712,8 +6712,7 @@ static int build_sched_domains(const cpu
if (cpus_empty(tmp))
break;
- nodemask = node_to_cpumask(n);
- cpus_and(tmp, tmp, nodemask);
+ cpus_and(tmp, tmp, *nodemask);
if (cpus_empty(tmp))
continue;
--- linux.trees.git.orig/mm/page_alloc.c
+++ linux.trees.git/mm/page_alloc.c
@@ -2029,6 +2029,7 @@ static int find_next_best_node(int node,
int n, val;
int min_val = INT_MAX;
int best_node = -1;
+ node_to_cpumask_ptr(tmp, 0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
@@ -2037,7 +2038,6 @@ static int find_next_best_node(int node,
}
for_each_node_state(n, N_HIGH_MEMORY) {
- cpumask_t tmp;
/* Don't want a node to appear more than once */
if (node_isset(n, *used_node_mask))
@@ -2050,8 +2050,8 @@ static int find_next_best_node(int node,
val += (n < node);
/* Give preference to headless and unused nodes */
- tmp = node_to_cpumask(n);
- if (!cpus_empty(tmp))
+ node_to_cpumask_ptr_next(tmp, n);
+ if (!cpus_empty(*tmp))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
--- linux.trees.git.orig/mm/slab.c
+++ linux.trees.git/mm/slab.c
@@ -1160,14 +1160,13 @@ static void __cpuinit cpuup_canceled(lon
struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu);
+ node_to_cpumask_ptr(mask, node);
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
struct array_cache *shared;
struct array_cache **alien;
- cpumask_t mask;
- mask = node_to_cpumask(node);
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
@@ -1183,7 +1182,7 @@ static void __cpuinit cpuup_canceled(lon
if (nc)
free_block(cachep, nc->entry, nc->avail, node);
- if (!cpus_empty(mask)) {
+ if (!cpus_empty(*mask)) {
spin_unlock_irq(&l3->list_lock);
goto free_array_cache;
}
--- linux.trees.git.orig/mm/vmscan.c
+++ linux.trees.git/mm/vmscan.c
@@ -1664,11 +1664,10 @@ static int kswapd(void *p)
struct reclaim_state reclaim_state = {
.reclaimed_slab = 0,
};
- cpumask_t cpumask;
+ node_to_cpumask_ptr(cpumask, pgdat->node_id);
- cpumask = node_to_cpumask(pgdat->node_id);
- if (!cpus_empty(cpumask))
- set_cpus_allowed(tsk, &cpumask);
+ if (!cpus_empty(*cpumask))
+ set_cpus_allowed(tsk, cpumask);
current->reclaim_state = &reclaim_state;
/*
@@ -1897,17 +1896,16 @@ out:
static int __devinit cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
- pg_data_t *pgdat;
- cpumask_t mask;
int nid;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
for_each_node_state(nid, N_HIGH_MEMORY) {
- pgdat = NODE_DATA(nid);
- mask = node_to_cpumask(pgdat->node_id);
- if (any_online_cpu(mask) < nr_cpu_ids)
+ pg_data_t *pgdat = NODE_DATA(nid);
+ node_to_cpumask_ptr(mask, pgdat->node_id);
+
+ if (any_online_cpu(*mask) < nr_cpu_ids)
/* One of our CPUs online: restore mask */
- set_cpus_allowed(pgdat->kswapd, &mask);
+ set_cpus_allowed(pgdat->kswapd, mask);
}
}
return NOTIFY_OK;
--- linux.trees.git.orig/net/sunrpc/svc.c
+++ linux.trees.git/net/sunrpc/svc.c
@@ -323,10 +323,10 @@ svc_pool_map_set_cpumask(unsigned int pi
case SVC_POOL_PERNODE:
{
unsigned int node = m->pool_to[pidx];
- cpumask_t nodecpumask = node_to_cpumask(node);
+ node_to_cpumask_ptr(nodecpumask, node);
*oldmask = current->cpus_allowed;
- set_cpus_allowed(current, &nodecpumask);
+ set_cpus_allowed(current, nodecpumask);
return 1;
}
}
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-26 1:38 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-26 1:38 [PATCH 00/12] cpumask: reduce stack pressure from local/passed cpumask variables v2 Mike Travis
2008-03-26 1:38 ` [PATCH 01/12] cpumask: Convert cpumask_of_cpu to allocated array v2 Mike Travis
2008-03-26 1:38 ` [PATCH 02/12] cpumask: pass pointer to cpumask for set_cpus_allowed() v2 Mike Travis
2008-03-26 1:38 ` [PATCH 03/12] cpumask: reduce stack pressure in sched_affinity Mike Travis
2008-03-26 1:38 ` [PATCH 04/12] cpumask: pass cpumask by reference to acpi-cpufreq Mike Travis
2008-03-26 2:15 ` Dave Jones
2008-03-26 1:38 ` [PATCH 05/12] init: move large array from stack to _initdata section Mike Travis
2008-03-26 1:38 ` Mike Travis [this message]
2008-03-26 1:38 ` [PATCH 07/12] cpumask: reduce stack usage in SD_x_INIT initializers Mike Travis
2008-03-26 1:38 ` [PATCH 08/12] cpumask: pass temp cpumask variables in init_sched_build_groups Mike Travis
2008-03-26 1:38 ` [PATCH 09/12] sched: fix memory leak in build_sched_domains Mike Travis
2008-03-26 1:38 ` [PATCH 10/12] cpumask: reduce stack usage " Mike Travis
2008-03-26 1:38 ` [PATCH 11/12] cpumask: reduce stack pressure in cpu_coregroup_map v2 Mike Travis
2008-03-26 1:38 ` [PATCH 12/12] cpu/node mask: reduce stack usage using MASK_NONE, MASK_ALL Mike Travis
2008-03-26 6:18 ` [PATCH 00/12] cpumask: reduce stack pressure from local/passed cpumask variables v2 Ingo Molnar
2008-03-26 15:53 ` Mike Travis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080326013812.622459000@polaris-admin.engr.sgi.com \
--to=travis@sgi.com \
--cc=akpm@linux-foundation.org \
--cc=anton@samba.org \
--cc=davem@davemloft.net \
--cc=dhowells@redhat.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
--cc=rth@twiddle.net \
--cc=tglx@linutronix.de \
--cc=tony.luck@intel.com \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox