* [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-26 6:40 ` Ingo Molnar
2008-03-25 22:06 ` [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2 Mike Travis
` (9 subsequent siblings)
10 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton
Cc: Ingo Molnar, linux-mm, linux-kernel, Andi Kleen, Thomas Gleixner,
Christoph Lameter
[-- Attachment #1: cleanup --]
[-- Type: text/plain, Size: 6609 bytes --]
Cleanup references to the early cpu maps for the non-SMP configuration
and remove some functions called for SMP configurations only.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
This patch was moved from the zero-based percpu variables patchset to here.
v2: rebased on linux-2.6.git + linux-2.6-x86.git
---
arch/x86/kernel/genapic_64.c | 2 ++
arch/x86/kernel/mpparse_64.c | 2 ++
arch/x86/kernel/setup.c | 28 +++++++++++-----------------
arch/x86/mm/numa_64.c | 4 +++-
include/asm-x86/smp.h | 5 +++++
include/asm-x86/topology.h | 15 +++++++++++----
6 files changed, 34 insertions(+), 22 deletions(-)
--- linux.trees.git.orig/arch/x86/kernel/genapic_64.c
+++ linux.trees.git/arch/x86/kernel/genapic_64.c
@@ -25,9 +25,11 @@
#endif
/* which logical CPU number maps to which CPU (physical APIC ID) */
+#ifdef CONFIG_SMP
u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
= { [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_cpu_to_apicid_early_ptr;
+#endif
DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
--- linux.trees.git.orig/arch/x86/kernel/mpparse_64.c
+++ linux.trees.git/arch/x86/kernel/mpparse_64.c
@@ -69,9 +69,11 @@ unsigned disabled_cpus __cpuinitdata;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+#ifdef CONFIG_SMP
u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
= {[0 ... NR_CPUS - 1] = BAD_APICID };
void *x86_bios_cpu_apicid_early_ptr;
+#endif
DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
--- linux.trees.git.orig/arch/x86/kernel/setup.c
+++ linux.trees.git/arch/x86/kernel/setup.c
@@ -10,7 +10,7 @@
#include <asm/setup.h>
#include <asm/topology.h>
-#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
@@ -21,22 +21,13 @@ static void __init setup_per_cpu_maps(vo
int cpu;
for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SMP
- if (per_cpu_offset(cpu)) {
-#endif
- per_cpu(x86_cpu_to_apicid, cpu) =
- x86_cpu_to_apicid_init[cpu];
- per_cpu(x86_bios_cpu_apicid, cpu) =
+ per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
+ per_cpu(x86_bios_cpu_apicid, cpu) =
x86_bios_cpu_apicid_init[cpu];
#ifdef CONFIG_NUMA
- per_cpu(x86_cpu_to_node_map, cpu) =
+ per_cpu(x86_cpu_to_node_map, cpu) =
x86_cpu_to_node_map_init[cpu];
#endif
-#ifdef CONFIG_SMP
- } else
- printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
- cpu);
-#endif
}
/* indicate the early static arrays will soon be gone */
@@ -72,17 +63,20 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
-
printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
size);
- for_each_cpu_mask(i, cpu_possible_map) {
+
+ for_each_possible_cpu(i) {
char *ptr;
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size);
#else
int node = early_cpu_to_node(i);
- if (!node_online(node) || !NODE_DATA(node))
+ if (!node_online(node) || !NODE_DATA(node)) {
ptr = alloc_bootmem_pages(size);
+ printk(KERN_INFO
+ "cpu %d has no node or node-local memory\n", i);
+ }
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
@@ -96,7 +90,7 @@ void __init setup_per_cpu_areas(void)
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
- /* setup percpu data maps early */
+ /* Setup percpu data maps */
setup_per_cpu_maps();
}
--- linux.trees.git.orig/arch/x86/mm/numa_64.c
+++ linux.trees.git/arch/x86/mm/numa_64.c
@@ -31,13 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNO
struct memnode memnode;
+#ifdef CONFIG_SMP
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
void *x86_cpu_to_node_map_early_ptr;
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+#endif
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
--- linux.trees.git.orig/include/asm-x86/smp.h
+++ linux.trees.git/include/asm-x86/smp.h
@@ -11,10 +11,15 @@ extern int smp_num_siblings;
extern unsigned int num_processors;
extern cpumask_t cpu_initialized;
+#ifdef CONFIG_SMP
extern u16 x86_cpu_to_apicid_init[];
extern u16 x86_bios_cpu_apicid_init[];
extern void *x86_cpu_to_apicid_early_ptr;
extern void *x86_bios_cpu_apicid_early_ptr;
+#else
+#define x86_cpu_to_apicid_early_ptr NULL
+#define x86_bios_cpu_apicid_early_ptr NULL
+#endif
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
--- linux.trees.git.orig/include/asm-x86/topology.h
+++ linux.trees.git/include/asm-x86/topology.h
@@ -39,8 +39,13 @@ extern int cpu_to_node_map[];
#endif
DECLARE_PER_CPU(int, x86_cpu_to_node_map);
+
+#ifdef CONFIG_SMP
extern int x86_cpu_to_node_map_init[];
extern void *x86_cpu_to_node_map_early_ptr;
+#else
+#define x86_cpu_to_node_map_early_ptr NULL
+#endif
extern cpumask_t node_to_cpumask_map[];
@@ -55,6 +60,8 @@ static inline int cpu_to_node(int cpu)
}
#else /* CONFIG_X86_64 */
+
+#ifdef CONFIG_SMP
static inline int early_cpu_to_node(int cpu)
{
int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
@@ -66,6 +73,9 @@ static inline int early_cpu_to_node(int
else
return NUMA_NO_NODE;
}
+#else
+#define early_cpu_to_node(cpu) cpu_to_node(cpu)
+#endif
static inline int cpu_to_node(int cpu)
{
@@ -77,10 +87,7 @@ static inline int cpu_to_node(int cpu)
return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
}
#endif
- if (per_cpu_offset(cpu))
- return per_cpu(x86_cpu_to_node_map, cpu);
- else
- return NUMA_NO_NODE;
+ return per_cpu(x86_cpu_to_node_map, cpu);
}
#endif /* CONFIG_X86_64 */
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2
2008-03-25 22:06 ` [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2 Mike Travis
@ 2008-03-26 6:40 ` Ingo Molnar
2008-03-26 16:11 ` Mike Travis
2008-04-07 20:36 ` Mike Travis
0 siblings, 2 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 6:40 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, linux-mm, linux-kernel, Andi Kleen,
Thomas Gleixner, Christoph Lameter
* Mike Travis <travis@sgi.com> wrote:
> Cleanup references to the early cpu maps for the non-SMP configuration
> and remove some functions called for SMP configurations only.
thanks, applied.
one observation:
> +#ifdef CONFIG_SMP
> extern int x86_cpu_to_node_map_init[];
> extern void *x86_cpu_to_node_map_early_ptr;
> +#else
> +#define x86_cpu_to_node_map_early_ptr NULL
> +#endif
Right now all these early_ptrs are in essence open-coded "early
per-cpu", right? But shouldnt we solve that in a much cleaner way: by
explicitly adding an early-per-cpu types and accessors, and avoid all
that #ifdeffery?
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2
2008-03-26 6:40 ` Ingo Molnar
@ 2008-03-26 16:11 ` Mike Travis
2008-04-07 20:36 ` Mike Travis
1 sibling, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 16:11 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, linux-mm, linux-kernel, Andi Kleen,
Thomas Gleixner, Christoph Lameter
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Cleanup references to the early cpu maps for the non-SMP configuration
>> and remove some functions called for SMP configurations only.
>
> thanks, applied.
>
> one observation:
>
>> +#ifdef CONFIG_SMP
>> extern int x86_cpu_to_node_map_init[];
>> extern void *x86_cpu_to_node_map_early_ptr;
>> +#else
>> +#define x86_cpu_to_node_map_early_ptr NULL
>> +#endif
>
> Right now all these early_ptrs are in essence open-coded "early
> per-cpu", right? But shouldnt we solve that in a much cleaner way: by
> explicitly adding an early-per-cpu types and accessors, and avoid all
> that #ifdeffery?
>
> Ingo
I was thinking of something similar but had to put it on the back
burner until we got to the point of being able to boot a kernel
with NR_CPUS set to 4096. It should pop back up on the priority
queue very soon... ;-)
Thanks!
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2
2008-03-26 6:40 ` Ingo Molnar
2008-03-26 16:11 ` Mike Travis
@ 2008-04-07 20:36 ` Mike Travis
2008-04-07 21:32 ` Ingo Molnar
1 sibling, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-04-07 20:36 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, linux-mm, linux-kernel, Thomas Gleixner,
Christoph Lameter
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Cleanup references to the early cpu maps for the non-SMP configuration
>> and remove some functions called for SMP configurations only.
>
> thanks, applied.
>
> one observation:
>
>> +#ifdef CONFIG_SMP
>> extern int x86_cpu_to_node_map_init[];
>> extern void *x86_cpu_to_node_map_early_ptr;
>> +#else
>> +#define x86_cpu_to_node_map_early_ptr NULL
>> +#endif
>
> Right now all these early_ptrs are in essence open-coded "early
> per-cpu", right? But shouldnt we solve that in a much cleaner way: by
> explicitly adding an early-per-cpu types and accessors, and avoid all
> that #ifdeffery?
>
> Ingo
How about something like the below? (I haven't tried compiling it yet.)
[I also thought about not restricting it to only NR_CPUS type variables
to allow for example, node-local node maps/variables.]
Thanks,
Mike
------------------------------------------------------------
include/linux/percpu.h:
#ifdef CONFIG_SMP
#define DEFINE_EARLY_PER_CPU(type, name, initvalue) \
DEFINE_PER_CPU(type, name) = initvalue; \
type name##_early_map[NR_CPUS] __initdata = \
{ [0 ... NR_CPUS-1] = initvalue; } \
type *name##_early_ptr = name##_early_map
#define DECLARE_EARLY_PER_CPU(type, name) \
DECLARE_PER_CPU(type, name); \
extern type *name##_early_ptr; \
extern type name##_early_map[]
#define EXPORT_EARLY_PER_CPU(name) \
EXPORT_PER_CPU(name)
/* rvalue only */
#define early_per_cpu(name, cpu) \
(name##ptr? name##ptr[cpu] : per_cpu(name, cpu))
#define early_per_cpu_ptr(name) (name##_early_ptr)
#define early_per_cpu_map(name, idx) (name##_early_map[idx])
#else /* !CONFIG_SMP */
#define DEFINE_EARLY_PER_CPU(type, name, initvalue) \
DEFINE_PER_CPU(type, name) = initvalue
#define DECLARE_EARLY_PER_CPU(name) \
DECLARE_PER_CPU(name)
#define EXPORT_EARLY_PER_CPU(name) \
EXPORT_PER_CPU(name)
#define early_per_cpu(name, cpu) per_cpu(name, cpu)
#define early_per_cpu_ptr(name) NULL
/* no early_per_cpu_map() */
#endif /* !CONFIG_SMP */
------------------------------------------------------------
include/asm-x86/smp.h:
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
------------------------------------------------------------
arch/x86/kernel/setup.c:
/* which logical CPU number maps to which CPU (physical APIC ID) */
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU(x86_bios_cpu_apicid);
#ifdef CONFIG_NUMA
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU(x86_cpu_to_node);
#endif
...
#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
* *_early_ptr's are zeroed indicating that the static arrays are gone.
*/
static void __init setup_per_cpu_maps(void)
{
int cpu;
for_each_possible_cpu(cpu) {
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
}
/* indicate the early static arrays will soon be gone */
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
...
------------------------------------------------------------
arch/x86/mm/numa_64.c:
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
if(cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if(per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;
...
void __init init_cpu_to_node(void)
{
int i;
for (i = 0; i < NR_CPUS; i++) {
int node;
u16 apicid = early_per_cpu(x86_cpu_to_apicid, i);
if (apicid == BAD_APICID)
continue;
...
------------------------------------------------------------
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
2008-03-25 22:06 ` [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2 Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-26 6:27 ` Ingo Molnar
2008-03-25 22:06 ` [PATCH 03/10] cpufreq: change cpu freq arrays to per_cpu variables Mike Travis
` (8 subsequent siblings)
10 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton
Cc: Ingo Molnar, linux-mm, linux-kernel, Tony Luck, Paul Mackerras,
Anton Blanchard, David S. Miller, William L. Irwin,
Thomas Gleixner, H. Peter Anvin
[-- Attachment #1: setup-nr_cpu_ids --]
[-- Type: text/plain, Size: 6359 bytes --]
Move the setting of nr_cpu_ids from sched_init() to setup_per_cpu_areas(),
so that it's available as early as possible.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
# ia64
Cc: Tony Luck <tony.luck@intel.com>
# powerpc
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
# sparc
Cc: David S. Miller <davem@davemloft.net>
Cc: William L. Irwin <wli@holomorphy.com>
# x86
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
Moved from the zero-based percpu variables patchset and redone to be
integrated with setup_per_cpu_areas instead of being called before
that function. This had to be done because some arch's call
prefill_possible_map() from setup_per_cpu_areas() which may increase
the number of possible cpus.
v2: rebased on linux-2.6.git + linux-2.6-x86.git
---
arch/ia64/kernel/acpi.c | 4 ++++
arch/ia64/kernel/setup.c | 7 +++++++
arch/powerpc/kernel/setup_64.c | 5 ++++-
arch/sparc64/mm/init.c | 10 +++++++++-
arch/x86/kernel/setup.c | 10 +++++++---
init/main.c | 15 ++++++++++++---
kernel/sched.c | 7 -------
7 files changed, 43 insertions(+), 15 deletions(-)
--- linux.trees.git.orig/arch/ia64/kernel/acpi.c
+++ linux.trees.git/arch/ia64/kernel/acpi.c
@@ -831,6 +831,10 @@ __init void prefill_possible_map(void)
for (i = 0; i < possible; i++)
cpu_set(i, cpu_possible_map);
+
+#ifdef CONFIG_SMP
+ nr_cpu_ids = possible;
+#endif
}
int acpi_map_lsapic(acpi_handle handle, int *pcpu)
--- linux.trees.git.orig/arch/ia64/kernel/setup.c
+++ linux.trees.git/arch/ia64/kernel/setup.c
@@ -765,6 +765,13 @@ setup_per_cpu_areas (void)
/* start_kernel() requires this... */
#ifdef CONFIG_ACPI_HOTPLUG_CPU
prefill_possible_map();
+#elif defined(CONFIG_SMP)
+ int cpu, highest_cpu = 0;
+
+ for_each_possible_cpu(cpu)
+ highest_cpu = cpu;
+
+ nr_cpu_ids = highest_cpu + 1;
#endif
}
--- linux.trees.git.orig/arch/powerpc/kernel/setup_64.c
+++ linux.trees.git/arch/powerpc/kernel/setup_64.c
@@ -576,7 +576,7 @@ void cpu_die(void)
#ifdef CONFIG_SMP
void __init setup_per_cpu_areas(void)
{
- int i;
+ int i, highest_cpu = 0;
unsigned long size;
char *ptr;
@@ -594,7 +594,10 @@ void __init setup_per_cpu_areas(void)
paca[i].data_offset = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ if (i > highest_cpu)
+ highest_cpu = i;
}
+ nr_cpu_ids = highest_cpu + 1;
/* Now that per_cpu is setup, initialize cpu_sibling_map */
smp_setup_cpu_sibling_map();
--- linux.trees.git.orig/arch/sparc64/mm/init.c
+++ linux.trees.git/arch/sparc64/mm/init.c
@@ -1292,10 +1292,18 @@ pgd_t swapper_pg_dir[2048];
static void sun4u_pgprot_init(void);
static void sun4v_pgprot_init(void);
-/* Dummy function */
+#ifdef CONFIG_SMP
+/* set nr_cpu_ids */
void __init setup_per_cpu_areas(void)
{
+ int cpu, highest_cpu = 0;
+
+ for_each_possible_cpu(cpu)
+ highest_cpu = cpu;
+
+ nr_cpu_ids = highest_cpu + 1;
}
+#endif
void __init paging_init(void)
{
--- linux.trees.git.orig/arch/x86/kernel/setup.c
+++ linux.trees.git/arch/x86/kernel/setup.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
*/
void __init setup_per_cpu_areas(void)
{
- int i;
+ int i, highest_cpu = 0;
unsigned long size;
#ifdef CONFIG_HOTPLUG_CPU
@@ -80,15 +80,19 @@ void __init setup_per_cpu_areas(void)
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
- if (!ptr)
- panic("Cannot allocate cpu data for CPU %d\n", i);
+
#ifdef CONFIG_X86_64
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
#else
__per_cpu_offset[i] = ptr - __per_cpu_start;
#endif
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+
+ if (i > highest_cpu)
+ highest_cpu = i;
}
+ nr_cpu_ids = highest_cpu + 1;
+ printk(KERN_DEBUG "NR_CPUS:%d (nr_cpu_ids:%d)\n", NR_CPUS, nr_cpu_ids);
/* Setup percpu data maps */
setup_per_cpu_maps();
--- linux.trees.git.orig/init/main.c
+++ linux.trees.git/init/main.c
@@ -364,16 +364,20 @@ static inline void smp_prepare_cpus(unsi
#else
+int nr_cpu_ids __read_mostly = NR_CPUS;
+EXPORT_SYMBOL(nr_cpu_ids);
+
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
EXPORT_SYMBOL(__per_cpu_offset);
+/* nr_cpu_ids is set as a side effect */
static void __init setup_per_cpu_areas(void)
{
- unsigned long size, i;
- char *ptr;
+ unsigned long size;
+ int i, highest_cpu = 0;
unsigned long nr_possible_cpus = num_possible_cpus();
+ char *ptr;
/* Copy section for each CPU (we discard the original) */
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
@@ -383,7 +387,12 @@ static void __init setup_per_cpu_areas(v
__per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
ptr += size;
+ if (i > highest_cpu)
+ highest_cpu = i;
}
+
+ nr_cpu_ids = highest_cpu + 1;
+ printk(KERN_DEBUG "NR_CPUS:%d (nr_cpu_ids:%d)\n", NR_CPUS, nr_cpu_ids);
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
--- linux.trees.git.orig/kernel/sched.c
+++ linux.trees.git/kernel/sched.c
@@ -5923,10 +5923,6 @@ void __init migration_init(void)
#ifdef CONFIG_SMP
-/* Number of possible processor ids */
-int nr_cpu_ids __read_mostly = NR_CPUS;
-EXPORT_SYMBOL(nr_cpu_ids);
-
#ifdef CONFIG_SCHED_DEBUG
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level)
@@ -7152,7 +7148,6 @@ static void init_tg_rt_entry(struct rq *
void __init sched_init(void)
{
- int highest_cpu = 0;
int i, j;
#ifdef CONFIG_SMP
@@ -7207,7 +7202,6 @@ void __init sched_init(void)
#endif
init_rq_hrtick(rq);
atomic_set(&rq->nr_iowait, 0);
- highest_cpu = i;
}
set_load_weight(&init_task);
@@ -7217,7 +7211,6 @@ void __init sched_init(void)
#endif
#ifdef CONFIG_SMP
- nr_cpu_ids = highest_cpu + 1;
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
#endif
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2
2008-03-25 22:06 ` [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2 Mike Travis
@ 2008-03-26 6:27 ` Ingo Molnar
2008-03-26 15:43 ` Mike Travis
0 siblings, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 6:27 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, linux-mm, linux-kernel, Tony Luck, Paul Mackerras,
Anton Blanchard, David S. Miller, William L. Irwin,
Thomas Gleixner, H. Peter Anvin
* Mike Travis <travis@sgi.com> wrote:
> Move the setting of nr_cpu_ids from sched_init() to
> setup_per_cpu_areas(), so that it's available as early as possible.
hm, why not a separate call before setup_per_cpu_areas(), so that we can
avoid spreading this from generic kernel into a bunch of architectures
that happen to have their own version of setup_per_cpu_areas():
> 7 files changed, 43 insertions(+), 15 deletions(-)
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2
2008-03-26 6:27 ` Ingo Molnar
@ 2008-03-26 15:43 ` Mike Travis
2008-03-26 17:09 ` Ingo Molnar
0 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-26 15:43 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, linux-mm, linux-kernel, Tony Luck, Paul Mackerras,
Anton Blanchard, David S. Miller, William L. Irwin,
Thomas Gleixner, H. Peter Anvin
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Move the setting of nr_cpu_ids from sched_init() to
>> setup_per_cpu_areas(), so that it's available as early as possible.
>
> hm, why not a separate call before setup_per_cpu_areas(), so that we can
> avoid spreading this from generic kernel into a bunch of architectures
> that happen to have their own version of setup_per_cpu_areas():
>
>> 7 files changed, 43 insertions(+), 15 deletions(-)
>
> Ingo
I had this before but I then discovered that an arch would increase
(and possible decrease) it's number of possible cpus in setup_per_cpu_areas().
So I figured that setting nr_cpu_ids (and the cpumask_of_cpu map) should
be a side effect of setup_per_cpu_areas().
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2
2008-03-26 15:43 ` Mike Travis
@ 2008-03-26 17:09 ` Ingo Molnar
2008-03-26 18:22 ` Mike Travis
0 siblings, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 17:09 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, linux-mm, linux-kernel, Tony Luck, Paul Mackerras,
Anton Blanchard, David S. Miller, William L. Irwin,
Thomas Gleixner, H. Peter Anvin
* Mike Travis <travis@sgi.com> wrote:
> Ingo Molnar wrote:
> > * Mike Travis <travis@sgi.com> wrote:
> >
> >> Move the setting of nr_cpu_ids from sched_init() to
> >> setup_per_cpu_areas(), so that it's available as early as possible.
> >
> > hm, why not a separate call before setup_per_cpu_areas(), so that we can
> > avoid spreading this from generic kernel into a bunch of architectures
> > that happen to have their own version of setup_per_cpu_areas():
> >
> >> 7 files changed, 43 insertions(+), 15 deletions(-)
> >
> > Ingo
>
> I had this before but I then discovered that an arch would increase
> (and possible decrease) it's number of possible cpus in
> setup_per_cpu_areas(). So I figured that setting nr_cpu_ids (and the
> cpumask_of_cpu map) should be a side effect of setup_per_cpu_areas().
well, then why not do it shortly after setup_per_cpu_areas()? That still
moves it earlier than sched_init() but doesnt export all this code and
complexity toevery setup_per_cpu_areas() implementation. (which clearly
didnt need this complexity before)
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2
2008-03-26 17:09 ` Ingo Molnar
@ 2008-03-26 18:22 ` Mike Travis
0 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 18:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, linux-mm, linux-kernel, Tony Luck, Paul Mackerras,
Anton Blanchard, David S. Miller, William L. Irwin,
Thomas Gleixner, H. Peter Anvin
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Ingo Molnar wrote:
>>> * Mike Travis <travis@sgi.com> wrote:
>>>
>>>> Move the setting of nr_cpu_ids from sched_init() to
>>>> setup_per_cpu_areas(), so that it's available as early as possible.
>>> hm, why not a separate call before setup_per_cpu_areas(), so that we can
>>> avoid spreading this from generic kernel into a bunch of architectures
>>> that happen to have their own version of setup_per_cpu_areas():
>>>
>>>> 7 files changed, 43 insertions(+), 15 deletions(-)
>>> Ingo
>> I had this before but I then discovered that an arch would increase
>> (and possible decrease) it's number of possible cpus in
>> setup_per_cpu_areas(). So I figured that setting nr_cpu_ids (and the
>> cpumask_of_cpu map) should be a side effect of setup_per_cpu_areas().
>
> well, then why not do it shortly after setup_per_cpu_areas()? That still
> moves it earlier than sched_init() but doesnt export all this code and
> complexity toevery setup_per_cpu_areas() implementation. (which clearly
> didnt need this complexity before)
>
> Ingo
Ok, will do.
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 03/10] cpufreq: change cpu freq arrays to per_cpu variables
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
2008-03-25 22:06 ` [PATCH 01/10] x86_64: Cleanup non-smp usage of cpu maps v2 Mike Travis
2008-03-25 22:06 ` [PATCH 02/10] init: move setup of nr_cpu_ids to as early as possible v2 Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-25 22:06 ` [PATCH 04/10] acpi: change processors from array to per_cpu variable Mike Travis
` (7 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel, Dave Jones
[-- Attachment #1: nr_cpus-in-cpufreq-cpu_alloc --]
[-- Type: text/plain, Size: 11179 bytes --]
Change cpufreq_policy and cpufreq_governor pointer tables
from arrays to per_cpu variables in the cpufreq subsystem.
Also some minor complaints from checkpatch.pl fixed.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Mike Travis <travis@sgi.com>
---
drivers/cpufreq/cpufreq.c | 45 +++++++++++++++++++++-------------------
drivers/cpufreq/cpufreq_stats.c | 24 ++++++++++-----------
drivers/cpufreq/freq_table.c | 12 +++++-----
3 files changed, 42 insertions(+), 39 deletions(-)
--- linux.trees.git.orig/drivers/cpufreq/cpufreq.c
+++ linux.trees.git/drivers/cpufreq/cpufreq.c
@@ -38,10 +38,10 @@
* also protects the cpufreq_cpu_data array.
*/
static struct cpufreq_driver *cpufreq_driver;
-static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
+static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
#ifdef CONFIG_HOTPLUG_CPU
/* This one keeps track of the previously set governor of a removed CPU */
-static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
+static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
#endif
static DEFINE_SPINLOCK(cpufreq_driver_lock);
@@ -133,7 +133,7 @@ struct cpufreq_policy *cpufreq_cpu_get(u
struct cpufreq_policy *data;
unsigned long flags;
- if (cpu >= NR_CPUS)
+ if (cpu >= nr_cpu_ids)
goto err_out;
/* get the cpufreq driver */
@@ -147,7 +147,7 @@ struct cpufreq_policy *cpufreq_cpu_get(u
/* get the CPU */
- data = cpufreq_cpu_data[cpu];
+ data = per_cpu(cpufreq_cpu_data, cpu);
if (!data)
goto err_out_put_module;
@@ -325,7 +325,7 @@ void cpufreq_notify_transition(struct cp
dprintk("notification %u of frequency transition to %u kHz\n",
state, freqs->new);
- policy = cpufreq_cpu_data[freqs->cpu];
+ policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
switch (state) {
case CPUFREQ_PRECHANGE:
@@ -809,8 +809,8 @@ static int cpufreq_add_dev (struct sys_d
#ifdef CONFIG_SMP
#ifdef CONFIG_HOTPLUG_CPU
- if (cpufreq_cpu_governor[cpu]){
- policy->governor = cpufreq_cpu_governor[cpu];
+ if (per_cpu(cpufreq_cpu_governor, cpu)) {
+ policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
dprintk("Restoring governor %s for cpu %d\n",
policy->governor->name, cpu);
}
@@ -835,7 +835,7 @@ static int cpufreq_add_dev (struct sys_d
spin_lock_irqsave(&cpufreq_driver_lock, flags);
managed_policy->cpus = policy->cpus;
- cpufreq_cpu_data[cpu] = managed_policy;
+ per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
dprintk("CPU already managed, adding link\n");
@@ -890,7 +890,7 @@ static int cpufreq_add_dev (struct sys_d
spin_lock_irqsave(&cpufreq_driver_lock, flags);
for_each_cpu_mask(j, policy->cpus) {
- cpufreq_cpu_data[j] = policy;
+ per_cpu(cpufreq_cpu_data, j) = policy;
per_cpu(policy_cpu, j) = policy->cpu;
}
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -939,7 +939,7 @@ static int cpufreq_add_dev (struct sys_d
err_out_unregister:
spin_lock_irqsave(&cpufreq_driver_lock, flags);
for_each_cpu_mask(j, policy->cpus)
- cpufreq_cpu_data[j] = NULL;
+ per_cpu(cpufreq_cpu_data, j) = NULL;
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
kobject_put(&policy->kobj);
@@ -981,7 +981,7 @@ static int __cpufreq_remove_dev (struct
dprintk("unregistering CPU %u\n", cpu);
spin_lock_irqsave(&cpufreq_driver_lock, flags);
- data = cpufreq_cpu_data[cpu];
+ data = per_cpu(cpufreq_cpu_data, cpu);
if (!data) {
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -989,7 +989,7 @@ static int __cpufreq_remove_dev (struct
unlock_policy_rwsem_write(cpu);
return -EINVAL;
}
- cpufreq_cpu_data[cpu] = NULL;
+ per_cpu(cpufreq_cpu_data, cpu) = NULL;
#ifdef CONFIG_SMP
@@ -1011,19 +1011,19 @@ static int __cpufreq_remove_dev (struct
#ifdef CONFIG_SMP
#ifdef CONFIG_HOTPLUG_CPU
- cpufreq_cpu_governor[cpu] = data->governor;
+ per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
#endif
/* if we have other CPUs still registered, we need to unlink them,
* or else wait_for_completion below will lock up. Clean the
- * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
- * links afterwards.
+ * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
+ * the sysfs links afterwards.
*/
if (unlikely(cpus_weight(data->cpus) > 1)) {
for_each_cpu_mask(j, data->cpus) {
if (j == cpu)
continue;
- cpufreq_cpu_data[j] = NULL;
+ per_cpu(cpufreq_cpu_data, j) = NULL;
}
}
@@ -1035,7 +1035,7 @@ static int __cpufreq_remove_dev (struct
continue;
dprintk("removing link for cpu %u\n", j);
#ifdef CONFIG_HOTPLUG_CPU
- cpufreq_cpu_governor[j] = data->governor;
+ per_cpu(cpufreq_cpu_governor, j) = data->governor;
#endif
cpu_sys_dev = get_cpu_sysdev(j);
sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
@@ -1145,7 +1145,7 @@ EXPORT_SYMBOL(cpufreq_quick_get);
static unsigned int __cpufreq_get(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
+ struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
unsigned int ret_freq = 0;
if (!cpufreq_driver->get)
@@ -1818,16 +1818,19 @@ int cpufreq_register_driver(struct cpufr
cpufreq_driver = driver_data;
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
- ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
+ ret = sysdev_driver_register(&cpu_sysdev_class,
+ &cpufreq_sysdev_driver);
if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
int i;
ret = -ENODEV;
/* check for at least one working CPU */
- for (i=0; i<NR_CPUS; i++)
- if (cpufreq_cpu_data[i])
+ for (i = 0; i < nr_cpu_ids; i++)
+ if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
ret = 0;
+ break;
+ }
/* if all ->init() calls failed, unregister */
if (ret) {
--- linux.trees.git.orig/drivers/cpufreq/cpufreq_stats.c
+++ linux.trees.git/drivers/cpufreq/cpufreq_stats.c
@@ -43,7 +43,7 @@ struct cpufreq_stats {
#endif
};
-static struct cpufreq_stats *cpufreq_stats_table[NR_CPUS];
+static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table);
struct cpufreq_stats_attribute {
struct attribute attr;
@@ -58,7 +58,7 @@ cpufreq_stats_update (unsigned int cpu)
cur_time = get_jiffies_64();
spin_lock(&cpufreq_stats_lock);
- stat = cpufreq_stats_table[cpu];
+ stat = per_cpu(cpufreq_stats_table, cpu);
if (stat->time_in_state)
stat->time_in_state[stat->last_index] =
cputime64_add(stat->time_in_state[stat->last_index],
@@ -71,11 +71,11 @@ cpufreq_stats_update (unsigned int cpu)
static ssize_t
show_total_trans(struct cpufreq_policy *policy, char *buf)
{
- struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
return sprintf(buf, "%d\n",
- cpufreq_stats_table[stat->cpu]->total_trans);
+ per_cpu(cpufreq_stats_table, stat->cpu)->total_trans);
}
static ssize_t
@@ -83,7 +83,7 @@ show_time_in_state(struct cpufreq_policy
{
ssize_t len = 0;
int i;
- struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
@@ -101,7 +101,7 @@ show_trans_table(struct cpufreq_policy *
ssize_t len = 0;
int i, j;
- struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
@@ -166,7 +166,7 @@ freq_table_get_index(struct cpufreq_stat
static void cpufreq_stats_free_table(unsigned int cpu)
{
- struct cpufreq_stats *stat = cpufreq_stats_table[cpu];
+ struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
if (policy && policy->cpu == cpu)
sysfs_remove_group(&policy->kobj, &stats_attr_group);
@@ -174,7 +174,7 @@ static void cpufreq_stats_free_table(uns
kfree(stat->time_in_state);
kfree(stat);
}
- cpufreq_stats_table[cpu] = NULL;
+ per_cpu(cpufreq_stats_table, cpu) = NULL;
if (policy)
cpufreq_cpu_put(policy);
}
@@ -188,7 +188,7 @@ cpufreq_stats_create_table (struct cpufr
struct cpufreq_policy *data;
unsigned int alloc_size;
unsigned int cpu = policy->cpu;
- if (cpufreq_stats_table[cpu])
+ if (per_cpu(cpufreq_stats_table, cpu))
return -EBUSY;
if ((stat = kzalloc(sizeof(struct cpufreq_stats), GFP_KERNEL)) == NULL)
return -ENOMEM;
@@ -203,7 +203,7 @@ cpufreq_stats_create_table (struct cpufr
goto error_out;
stat->cpu = cpu;
- cpufreq_stats_table[cpu] = stat;
+ per_cpu(cpufreq_stats_table, cpu) = stat;
for (i=0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
@@ -247,7 +247,7 @@ error_out:
cpufreq_cpu_put(data);
error_get_fail:
kfree(stat);
- cpufreq_stats_table[cpu] = NULL;
+ per_cpu(cpufreq_stats_table, cpu) = NULL;
return ret;
}
@@ -280,7 +280,7 @@ cpufreq_stat_notifier_trans (struct noti
if (val != CPUFREQ_POSTCHANGE)
return 0;
- stat = cpufreq_stats_table[freq->cpu];
+ stat = per_cpu(cpufreq_stats_table, freq->cpu);
if (!stat)
return 0;
--- linux.trees.git.orig/drivers/cpufreq/freq_table.c
+++ linux.trees.git/drivers/cpufreq/freq_table.c
@@ -169,7 +169,7 @@ int cpufreq_frequency_table_target(struc
}
EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target);
-static struct cpufreq_frequency_table *show_table[NR_CPUS];
+static DEFINE_PER_CPU(struct cpufreq_frequency_table *, show_table);
/**
* show_available_freqs - show available frequencies for the specified CPU
*/
@@ -180,10 +180,10 @@ static ssize_t show_available_freqs (str
ssize_t count = 0;
struct cpufreq_frequency_table *table;
- if (!show_table[cpu])
+ if (!per_cpu(show_table, cpu))
return -ENODEV;
- table = show_table[cpu];
+ table = per_cpu(show_table, cpu);
for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
if (table[i].frequency == CPUFREQ_ENTRY_INVALID)
@@ -212,20 +212,20 @@ void cpufreq_frequency_table_get_attr(st
unsigned int cpu)
{
dprintk("setting show_table for cpu %u to %p\n", cpu, table);
- show_table[cpu] = table;
+ per_cpu(show_table, cpu) = table;
}
EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
void cpufreq_frequency_table_put_attr(unsigned int cpu)
{
dprintk("clearing show_table for cpu %u\n", cpu);
- show_table[cpu] = NULL;
+ per_cpu(show_table, cpu) = NULL;
}
EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
{
- return show_table[cpu];
+ return per_cpu(show_table, cpu);
}
EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 04/10] acpi: change processors from array to per_cpu variable
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (2 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 03/10] cpufreq: change cpu freq arrays to per_cpu variables Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-25 22:06 ` [PATCH 05/10] cpumask: Add cpumask_scnprintf_len function Mike Travis
` (6 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel, Len Brown
[-- Attachment #1: nr_cpus-in-acpi-driver-cpu_alloc --]
[-- Type: text/plain, Size: 8204 bytes --]
Change processors from an array sized by NR_CPUS to a per_cpu variable.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Len Brown <len.brown@intel.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
drivers/acpi/processor_core.c | 18 ++++++++----------
drivers/acpi/processor_idle.c | 8 ++++----
drivers/acpi/processor_perflib.c | 18 +++++++++---------
drivers/acpi/processor_throttling.c | 14 +++++++-------
include/acpi/processor.h | 2 +-
5 files changed, 29 insertions(+), 31 deletions(-)
--- linux.trees.git.orig/drivers/acpi/processor_core.c
+++ linux.trees.git/drivers/acpi/processor_core.c
@@ -118,7 +118,7 @@ static const struct file_operations acpi
.release = single_release,
};
-struct acpi_processor *processors[NR_CPUS];
+DEFINE_PER_CPU(struct acpi_processor *, processors);
struct acpi_processor_errata errata __read_mostly;
/* --------------------------------------------------------------------------
@@ -615,7 +615,7 @@ static int acpi_processor_get_info(struc
return 0;
}
-static void *processor_device_array[NR_CPUS];
+static DEFINE_PER_CPU(void *, processor_device_array);
static int __cpuinit acpi_processor_start(struct acpi_device *device)
{
@@ -639,15 +639,15 @@ static int __cpuinit acpi_processor_star
* ACPI id of processors can be reported wrongly by the BIOS.
* Don't trust it blindly
*/
- if (processor_device_array[pr->id] != NULL &&
- processor_device_array[pr->id] != device) {
+ if (per_cpu(processor_device_array, pr->id) != NULL &&
+ per_cpu(processor_device_array, pr->id) != device) {
printk(KERN_WARNING "BIOS reported wrong ACPI id "
"for the processor\n");
return -ENODEV;
}
- processor_device_array[pr->id] = device;
+ per_cpu(processor_device_array, pr->id) = device;
- processors[pr->id] = pr;
+ per_cpu(processors, pr->id) = pr;
result = acpi_processor_add_fs(device);
if (result)
@@ -751,7 +751,7 @@ static int acpi_cpu_soft_notify(struct n
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct acpi_processor *pr = processors[cpu];
+ struct acpi_processor *pr = per_cpu(processors, cpu);
if (action == CPU_ONLINE && pr) {
acpi_processor_ppc_has_changed(pr);
@@ -821,7 +821,7 @@ static int acpi_processor_remove(struct
pr->cdev = NULL;
}
- processors[pr->id] = NULL;
+ per_cpu(processors, pr->id) = NULL;
kfree(pr);
@@ -1070,8 +1070,6 @@ static int __init acpi_processor_init(vo
{
int result = 0;
-
- memset(&processors, 0, sizeof(processors));
memset(&errata, 0, sizeof(errata));
#ifdef CONFIG_SMP
--- linux.trees.git.orig/drivers/acpi/processor_idle.c
+++ linux.trees.git/drivers/acpi/processor_idle.c
@@ -401,7 +401,7 @@ static void acpi_processor_idle(void)
*/
local_irq_disable();
- pr = processors[smp_processor_id()];
+ pr = __get_cpu_var(processors);
if (!pr) {
local_irq_enable();
return;
@@ -1425,7 +1425,7 @@ static int acpi_idle_enter_c1(struct cpu
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
- pr = processors[smp_processor_id()];
+ pr = __get_cpu_var(processors);
if (unlikely(!pr))
return 0;
@@ -1465,7 +1465,7 @@ static int acpi_idle_enter_simple(struct
u32 t1, t2;
int sleep_ticks = 0;
- pr = processors[smp_processor_id()];
+ pr = __get_cpu_var(processors);
if (unlikely(!pr))
return 0;
@@ -1544,7 +1544,7 @@ static int acpi_idle_enter_bm(struct cpu
u32 t1, t2;
int sleep_ticks = 0;
- pr = processors[smp_processor_id()];
+ pr = __get_cpu_var(processors);
if (unlikely(!pr))
return 0;
--- linux.trees.git.orig/drivers/acpi/processor_perflib.c
+++ linux.trees.git/drivers/acpi/processor_perflib.c
@@ -89,7 +89,7 @@ static int acpi_processor_ppc_notifier(s
if (event != CPUFREQ_INCOMPATIBLE)
goto out;
- pr = processors[policy->cpu];
+ pr = per_cpu(processors, policy->cpu);
if (!pr || !pr->performance)
goto out;
@@ -577,7 +577,7 @@ int acpi_processor_preregister_performan
/* Call _PSD for all CPUs */
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr) {
/* Look only at processors in ACPI namespace */
continue;
@@ -608,7 +608,7 @@ int acpi_processor_preregister_performan
* domain info.
*/
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr)
continue;
@@ -629,7 +629,7 @@ int acpi_processor_preregister_performan
cpus_clear(covered_cpus);
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr)
continue;
@@ -656,7 +656,7 @@ int acpi_processor_preregister_performan
if (i == j)
continue;
- match_pr = processors[j];
+ match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
@@ -685,7 +685,7 @@ int acpi_processor_preregister_performan
if (i == j)
continue;
- match_pr = processors[j];
+ match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
@@ -702,7 +702,7 @@ int acpi_processor_preregister_performan
err_ret:
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr || !pr->performance)
continue;
@@ -733,7 +733,7 @@ acpi_processor_register_performance(stru
mutex_lock(&performance_mutex);
- pr = processors[cpu];
+ pr = per_cpu(processors, cpu);
if (!pr) {
mutex_unlock(&performance_mutex);
return -ENODEV;
@@ -771,7 +771,7 @@ acpi_processor_unregister_performance(st
mutex_lock(&performance_mutex);
- pr = processors[cpu];
+ pr = per_cpu(processors, cpu);
if (!pr) {
mutex_unlock(&performance_mutex);
return;
--- linux.trees.git.orig/drivers/acpi/processor_throttling.c
+++ linux.trees.git/drivers/acpi/processor_throttling.c
@@ -71,7 +71,7 @@ static int acpi_processor_update_tsd_coo
* coordination between all CPUs.
*/
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr)
continue;
@@ -93,7 +93,7 @@ static int acpi_processor_update_tsd_coo
cpus_clear(covered_cpus);
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr)
continue;
@@ -119,7 +119,7 @@ static int acpi_processor_update_tsd_coo
if (i == j)
continue;
- match_pr = processors[j];
+ match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
@@ -152,7 +152,7 @@ static int acpi_processor_update_tsd_coo
if (i == j)
continue;
- match_pr = processors[j];
+ match_pr = per_cpu(processors, j);
if (!match_pr)
continue;
@@ -172,7 +172,7 @@ static int acpi_processor_update_tsd_coo
err_ret:
for_each_possible_cpu(i) {
- pr = processors[i];
+ pr = per_cpu(processors, i);
if (!pr)
continue;
@@ -214,7 +214,7 @@ static int acpi_processor_throttling_not
struct acpi_processor_throttling *p_throttling;
cpu = p_tstate->cpu;
- pr = processors[cpu];
+ pr = per_cpu(processors, cpu);
if (!pr) {
ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid pr pointer\n"));
return 0;
@@ -1035,7 +1035,7 @@ int acpi_processor_set_throttling(struct
* cpus.
*/
for_each_cpu_mask(i, online_throttling_cpus) {
- match_pr = processors[i];
+ match_pr = per_cpu(processors, i);
/*
* If the pointer is invalid, we will report the
* error message and continue.
--- linux.trees.git.orig/include/acpi/processor.h
+++ linux.trees.git/include/acpi/processor.h
@@ -255,7 +255,7 @@ extern void acpi_processor_unregister_pe
int acpi_processor_notify_smm(struct module *calling_module);
/* for communication between multiple parts of the processor kernel module */
-extern struct acpi_processor *processors[NR_CPUS];
+DECLARE_PER_CPU(struct acpi_processor *, processors);
extern struct acpi_processor_errata errata;
void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 05/10] cpumask: Add cpumask_scnprintf_len function
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (3 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 04/10] acpi: change processors from array to per_cpu variable Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-25 22:06 ` [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo Mike Travis
` (5 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel, Paul Jackson
[-- Attachment #1: add-cpumask_scnprintf_len --]
[-- Type: text/plain, Size: 2896 bytes --]
Add a new function cpumask_scnprintf_len() to return the number of
characters needed to display "len" cpumask bits. The current method
of allocating NR_CPUS bytes is incorrect as what's really needed is
9 characters per 32-bit word of cpumask bits (8 hex digits plus the
seperator [','] or the terminating NULL.) This function provides the
caller the means to allocate the correct string length.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
include/linux/bitmap.h | 1 +
include/linux/cpumask.h | 7 +++++++
lib/bitmap.c | 16 ++++++++++++++++
3 files changed, 24 insertions(+)
--- linux.trees.git.orig/include/linux/bitmap.h
+++ linux.trees.git/include/linux/bitmap.h
@@ -108,6 +108,7 @@ extern int __bitmap_weight(const unsigne
extern int bitmap_scnprintf(char *buf, unsigned int len,
const unsigned long *src, int nbits);
+extern int bitmap_scnprintf_len(unsigned int len);
extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
unsigned long *dst, int nbits);
extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
--- linux.trees.git.orig/include/linux/cpumask.h
+++ linux.trees.git/include/linux/cpumask.h
@@ -273,6 +273,13 @@ static inline int __cpumask_scnprintf(ch
return bitmap_scnprintf(buf, len, srcp->bits, nbits);
}
+#define cpumask_scnprintf_len(len) \
+ __cpumask_scnprintf_len((len))
+static inline int __cpumask_scnprintf_len(int len)
+{
+ return bitmap_scnprintf_len(len);
+}
+
#define cpumask_parse_user(ubuf, ulen, dst) \
__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
static inline int __cpumask_parse_user(const char __user *buf, int len,
--- linux.trees.git.orig/lib/bitmap.c
+++ linux.trees.git/lib/bitmap.c
@@ -316,6 +316,22 @@ int bitmap_scnprintf(char *buf, unsigned
EXPORT_SYMBOL(bitmap_scnprintf);
/**
+ * bitmap_scnprintf_len - return buffer length needed to convert
+ * bitmap to an ASCII hex string.
+ * @len: number of bits to be converted
+ */
+int bitmap_scnprintf_len(unsigned int len)
+{
+ /* we need 9 chars per word for 32 bit words (8 hexdigits + sep/null) */
+ int bitslen = ALIGN(len, CHUNKSZ);
+ int wordlen = CHUNKSZ / 4;
+ int buflen = (bitslen / wordlen) * (wordlen + 1) * sizeof(char);
+
+ return buflen;
+}
+EXPORT_SYMBOL(bitmap_scnprintf_len);
+
+/**
* __bitmap_parse - convert an ASCII hex string into a bitmap.
* @buf: pointer to buffer containing string.
* @buflen: buffer size in bytes. If string is smaller than this
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (4 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 05/10] cpumask: Add cpumask_scnprintf_len function Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-26 6:50 ` Ingo Molnar
2008-03-25 22:06 ` [PATCH 07/10] cpu: change cpu_sys_devices from array to per_cpu variable Mike Travis
` (4 subsequent siblings)
10 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton
Cc: Ingo Molnar, linux-mm, linux-kernel, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, Andi Kleen
[-- Attachment #1: nr_cpus-in-intel_cacheinfo --]
[-- Type: text/plain, Size: 7368 bytes --]
* Change the following static arrays sized by NR_CPUS to
per_cpu data variables:
_cpuid4_info *cpuid4_info[NR_CPUS];
_index_kobject *index_kobject[NR_CPUS];
kobject * cache_kobject[NR_CPUS];
* Remove the local NR_CPUS array with a kmalloc'd region in
show_shared_cpu_map().
Also some minor complaints from checkpatch.pl fixed.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 70 +++++++++++++++++++---------------
1 file changed, 40 insertions(+), 30 deletions(-)
--- linux.trees.git.orig/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ linux.trees.git/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -129,7 +129,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
- cpumask_t shared_cpu_map;
+ cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};
unsigned short num_cache_leaves;
@@ -451,8 +451,8 @@ unsigned int __cpuinit init_intel_cachei
}
/* pointer to _cpuid4_info array (for each cache leaf) */
-static struct _cpuid4_info *cpuid4_info[NR_CPUS];
-#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
+static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -474,7 +474,7 @@ static void __cpuinit cache_shared_cpu_m
if (cpu_data(i).apicid >> index_msb ==
c->apicid >> index_msb) {
cpu_set(i, this_leaf->shared_cpu_map);
- if (i != cpu && cpuid4_info[i]) {
+ if (i != cpu && per_cpu(cpuid4_info, i)) {
sibling_leaf = CPUID4_INFO_IDX(i, index);
cpu_set(cpu, sibling_leaf->shared_cpu_map);
}
@@ -505,8 +505,8 @@ static void __cpuinit free_cache_attribu
for (i = 0; i < num_cache_leaves; i++)
cache_remove_shared_cpu_map(cpu, i);
- kfree(cpuid4_info[cpu]);
- cpuid4_info[cpu] = NULL;
+ kfree(per_cpu(cpuid4_info, cpu));
+ per_cpu(cpuid4_info, cpu) = NULL;
}
static int __cpuinit detect_cache_attributes(unsigned int cpu)
@@ -519,9 +519,9 @@ static int __cpuinit detect_cache_attrib
if (num_cache_leaves == 0)
return -ENOENT;
- cpuid4_info[cpu] = kzalloc(
+ per_cpu(cpuid4_info, cpu) = kzalloc(
sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (cpuid4_info[cpu] == NULL)
+ if (per_cpu(cpuid4_info, cpu) == NULL)
return -ENOMEM;
oldmask = current->cpus_allowed;
@@ -546,8 +546,8 @@ static int __cpuinit detect_cache_attrib
out:
if (retval) {
- kfree(cpuid4_info[cpu]);
- cpuid4_info[cpu] = NULL;
+ kfree(per_cpu(cpuid4_info, cpu));
+ per_cpu(cpuid4_info, cpu) = NULL;
}
return retval;
@@ -561,7 +561,7 @@ out:
extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
/* pointer to kobject for cpuX/cache */
-static struct kobject * cache_kobject[NR_CPUS];
+static DEFINE_PER_CPU(struct kobject *, cache_kobject);
struct _index_kobject {
struct kobject kobj;
@@ -570,8 +570,8 @@ struct _index_kobject {
};
/* pointer to array of kobjects for cpuX/cache/indexY */
-static struct _index_kobject *index_kobject[NR_CPUS];
-#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
+static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -593,9 +593,16 @@ static ssize_t show_size(struct _cpuid4_
static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
{
- char mask_str[NR_CPUS];
- cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
- return sprintf(buf, "%s\n", mask_str);
+ int n = 0;
+ int len = cpumask_scnprintf_len(nr_cpu_ids);
+ char *mask_str = kmalloc(len, GFP_KERNEL);
+
+ if (mask_str) {
+ cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
+ n = sprintf(buf, "%s\n", mask_str);
+ kfree(mask_str);
+ }
+ return n;
}
static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
@@ -684,10 +691,10 @@ static struct kobj_type ktype_percpu_ent
static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
- kfree(cache_kobject[cpu]);
- kfree(index_kobject[cpu]);
- cache_kobject[cpu] = NULL;
- index_kobject[cpu] = NULL;
+ kfree(per_cpu(cache_kobject, cpu));
+ kfree(per_cpu(index_kobject, cpu));
+ per_cpu(cache_kobject, cpu) = NULL;
+ per_cpu(index_kobject, cpu) = NULL;
free_cache_attributes(cpu);
}
@@ -703,13 +710,14 @@ static int __cpuinit cpuid4_cache_sysfs_
return err;
/* Allocate all required memory */
- cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
- if (unlikely(cache_kobject[cpu] == NULL))
+ per_cpu(cache_kobject, cpu) =
+ kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
goto err_out;
- index_kobject[cpu] = kzalloc(
+ per_cpu(index_kobject, cpu) = kzalloc(
sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(index_kobject[cpu] == NULL))
+ if (unlikely(per_cpu(index_kobject, cpu) == NULL))
goto err_out;
return 0;
@@ -733,7 +741,8 @@ static int __cpuinit cache_add_dev(struc
if (unlikely(retval < 0))
return retval;
- retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry,
+ retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
+ &ktype_percpu_entry,
&sys_dev->kobj, "%s", "cache");
if (retval < 0) {
cpuid4_cache_sysfs_exit(cpu);
@@ -745,13 +754,14 @@ static int __cpuinit cache_add_dev(struc
this_object->cpu = cpu;
this_object->index = i;
retval = kobject_init_and_add(&(this_object->kobj),
- &ktype_cache, cache_kobject[cpu],
+ &ktype_cache,
+ per_cpu(cache_kobject, cpu),
"index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++) {
kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
}
- kobject_put(cache_kobject[cpu]);
+ kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
break;
}
@@ -760,7 +770,7 @@ static int __cpuinit cache_add_dev(struc
if (!retval)
cpu_set(cpu, cache_dev_map);
- kobject_uevent(cache_kobject[cpu], KOBJ_ADD);
+ kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
return retval;
}
@@ -769,7 +779,7 @@ static void __cpuinit cache_remove_dev(s
unsigned int cpu = sys_dev->id;
unsigned long i;
- if (cpuid4_info[cpu] == NULL)
+ if (per_cpu(cpuid4_info, cpu) == NULL)
return;
if (!cpu_isset(cpu, cache_dev_map))
return;
@@ -777,7 +787,7 @@ static void __cpuinit cache_remove_dev(s
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- kobject_put(cache_kobject[cpu]);
+ kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
}
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-25 22:06 ` [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo Mike Travis
@ 2008-03-26 6:50 ` Ingo Molnar
2008-03-26 15:41 ` Mike Travis
0 siblings, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 6:50 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, linux-mm, linux-kernel, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, Andi Kleen
* Mike Travis <travis@sgi.com> wrote:
> * Change the following static arrays sized by NR_CPUS to
> per_cpu data variables:
>
> _cpuid4_info *cpuid4_info[NR_CPUS];
> _index_kobject *index_kobject[NR_CPUS];
> kobject * cache_kobject[NR_CPUS];
>
> * Remove the local NR_CPUS array with a kmalloc'd region in
> show_shared_cpu_map().
thanks Travis, i've applied this to x86.git.
one observation:
> static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
> {
> - char mask_str[NR_CPUS];
> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
> - return sprintf(buf, "%s\n", mask_str);
> + int n = 0;
> + int len = cpumask_scnprintf_len(nr_cpu_ids);
> + char *mask_str = kmalloc(len, GFP_KERNEL);
> +
> + if (mask_str) {
> + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
> + n = sprintf(buf, "%s\n", mask_str);
> + kfree(mask_str);
> + }
> + return n;
the other changes look good, but this one looks a bit ugly and complex.
We basically want to sprintf shared_cpu_map into 'buf', but we do that
by first allocating a temporary buffer, print a string into it, then
print that string into another buffer ...
this very much smells like an API bug in cpumask_scnprintf() - why dont
you create a cpumask_scnprintf_ptr() API that takes a pointer to a
cpumask? Then this change would become a trivial and much more readable:
- char mask_str[NR_CPUS];
- cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
- return sprintf(buf, "%s\n", mask_str);
+ return cpumask_scnprintf_ptr(buf, NR_CPUS, &this_leaf->shared_cpu_map);
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 6:50 ` Ingo Molnar
@ 2008-03-26 15:41 ` Mike Travis
2008-03-26 16:13 ` Jeremy Fitzhardinge
2008-03-26 17:12 ` Ingo Molnar
0 siblings, 2 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 15:41 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, linux-mm, linux-kernel, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, Andi Kleen
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> * Change the following static arrays sized by NR_CPUS to
>> per_cpu data variables:
>>
>> _cpuid4_info *cpuid4_info[NR_CPUS];
>> _index_kobject *index_kobject[NR_CPUS];
>> kobject * cache_kobject[NR_CPUS];
>>
>> * Remove the local NR_CPUS array with a kmalloc'd region in
>> show_shared_cpu_map().
>
> thanks Travis, i've applied this to x86.git.
>
> one observation:
>
>> static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
>> {
>> - char mask_str[NR_CPUS];
>> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
>> - return sprintf(buf, "%s\n", mask_str);
>> + int n = 0;
>> + int len = cpumask_scnprintf_len(nr_cpu_ids);
>> + char *mask_str = kmalloc(len, GFP_KERNEL);
>> +
>> + if (mask_str) {
>> + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
>> + n = sprintf(buf, "%s\n", mask_str);
>> + kfree(mask_str);
>> + }
>> + return n;
>
> the other changes look good, but this one looks a bit ugly and complex.
> We basically want to sprintf shared_cpu_map into 'buf', but we do that
> by first allocating a temporary buffer, print a string into it, then
> print that string into another buffer ...
>
> this very much smells like an API bug in cpumask_scnprintf() - why dont
> you create a cpumask_scnprintf_ptr() API that takes a pointer to a
> cpumask? Then this change would become a trivial and much more readable:
>
> - char mask_str[NR_CPUS];
> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
> - return sprintf(buf, "%s\n", mask_str);
> + return cpumask_scnprintf_ptr(buf, NR_CPUS, &this_leaf->shared_cpu_map);
>
> Ingo
The main goal was to avoid allocating 4096 bytes when only 32 would do
(characters needed to represent nr_cpu_ids cpus instead of NR_CPUS cpus.)
But I'll look at cleaning it up a bit more. It wouldn't have to be
a function if CHUNKSZ in cpumask_scnprintf() were visible (or a non-changeable
constant.)
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 15:41 ` Mike Travis
@ 2008-03-26 16:13 ` Jeremy Fitzhardinge
2008-03-26 16:27 ` Mike Travis
2008-03-26 17:12 ` Ingo Molnar
1 sibling, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-03-26 16:13 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen
Mike Travis wrote:
> Ingo Molnar wrote:
>
>> * Mike Travis <travis@sgi.com> wrote:
>>
>>
>>> * Change the following static arrays sized by NR_CPUS to
>>> per_cpu data variables:
>>>
>>> _cpuid4_info *cpuid4_info[NR_CPUS];
>>> _index_kobject *index_kobject[NR_CPUS];
>>> kobject * cache_kobject[NR_CPUS];
>>>
>>> * Remove the local NR_CPUS array with a kmalloc'd region in
>>> show_shared_cpu_map().
>>>
>> thanks Travis, i've applied this to x86.git.
>>
>> one observation:
>>
>>
>>> static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
>>> {
>>> - char mask_str[NR_CPUS];
>>> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
>>> - return sprintf(buf, "%s\n", mask_str);
>>> + int n = 0;
>>> + int len = cpumask_scnprintf_len(nr_cpu_ids);
>>> + char *mask_str = kmalloc(len, GFP_KERNEL);
>>> +
>>> + if (mask_str) {
>>> + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
>>> + n = sprintf(buf, "%s\n", mask_str);
>>> + kfree(mask_str);
>>> + }
>>> + return n;
>>>
>> the other changes look good, but this one looks a bit ugly and complex.
>> We basically want to sprintf shared_cpu_map into 'buf', but we do that
>> by first allocating a temporary buffer, print a string into it, then
>> print that string into another buffer ...
>>
>> this very much smells like an API bug in cpumask_scnprintf() - why dont
>> you create a cpumask_scnprintf_ptr() API that takes a pointer to a
>> cpumask? Then this change would become a trivial and much more readable:
>>
>> - char mask_str[NR_CPUS];
>> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
>> - return sprintf(buf, "%s\n", mask_str);
>> + return cpumask_scnprintf_ptr(buf, NR_CPUS, &this_leaf->shared_cpu_map);
>>
>> Ingo
>>
>
> The main goal was to avoid allocating 4096 bytes when only 32 would do
> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS cpus.)
> But I'll look at cleaning it up a bit more. It wouldn't have to be
> a function if CHUNKSZ in cpumask_scnprintf() were visible (or a non-changeable
> constant.)
>
It's a pity you can't take advantage of kasprintf to handle all this.
Hm, I would say that bitmap_scnprintf is a candidate for implementation
as a printk format specifier so you could get away from needing a
special function to print bitmaps...
Eh? What's the difference between snprintf and scnprintf?
J
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 16:13 ` Jeremy Fitzhardinge
@ 2008-03-26 16:27 ` Mike Travis
2008-03-26 16:59 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-26 16:27 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, H. Peter Anvin
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> Ingo Molnar wrote:
>>
>>> * Mike Travis <travis@sgi.com> wrote:
>>>
>>>
>>>> * Change the following static arrays sized by NR_CPUS to
>>>> per_cpu data variables:
>>>>
>>>> _cpuid4_info *cpuid4_info[NR_CPUS];
>>>> _index_kobject *index_kobject[NR_CPUS];
>>>> kobject * cache_kobject[NR_CPUS];
>>>>
>>>> * Remove the local NR_CPUS array with a kmalloc'd region in
>>>> show_shared_cpu_map().
>>>>
>>> thanks Travis, i've applied this to x86.git.
>>>
>>> one observation:
>>>
>>>
>>>> static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf,
>>>> char *buf)
>>>> {
>>>> - char mask_str[NR_CPUS];
>>>> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
>>>> - return sprintf(buf, "%s\n", mask_str);
>>>> + int n = 0;
>>>> + int len = cpumask_scnprintf_len(nr_cpu_ids);
>>>> + char *mask_str = kmalloc(len, GFP_KERNEL);
>>>> +
>>>> + if (mask_str) {
>>>> + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
>>>> + n = sprintf(buf, "%s\n", mask_str);
>>>> + kfree(mask_str);
>>>> + }
>>>> + return n;
>>>>
>>> the other changes look good, but this one looks a bit ugly and
>>> complex. We basically want to sprintf shared_cpu_map into 'buf', but
>>> we do that by first allocating a temporary buffer, print a string
>>> into it, then print that string into another buffer ...
>>>
>>> this very much smells like an API bug in cpumask_scnprintf() - why
>>> dont you create a cpumask_scnprintf_ptr() API that takes a pointer to
>>> a cpumask? Then this change would become a trivial and much more
>>> readable:
>>>
>>> - char mask_str[NR_CPUS];
>>> - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
>>> - return sprintf(buf, "%s\n", mask_str);
>>> + return cpumask_scnprintf_ptr(buf, NR_CPUS,
>>> &this_leaf->shared_cpu_map);
>>>
>>> Ingo
>>>
>>
>> The main goal was to avoid allocating 4096 bytes when only 32 would do
>> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS cpus.)
>> But I'll look at cleaning it up a bit more. It wouldn't have to be
>> a function if CHUNKSZ in cpumask_scnprintf() were visible (or a
>> non-changeable
>> constant.)
>>
>
> It's a pity you can't take advantage of kasprintf to handle all this.
>
> Hm, I would say that bitmap_scnprintf is a candidate for implementation
> as a printk format specifier so you could get away from needing a
> special function to print bitmaps...
Hmm, I hadn't thought of that. There is commonly a format spec called
%b for diags, etc. to print bit strings. Maybe something like:
"... %*b ...", nr_cpu_ids, ptr_to_bitmap
where the length arg is rounded up to 32 or 64 bits...?
>
> Eh? What's the difference between snprintf and scnprintf?
Good question... I'll have to ask the cpumask person. ;-)
>
> J
Thanks!
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 16:27 ` Mike Travis
@ 2008-03-26 16:59 ` Jeremy Fitzhardinge
2008-03-26 18:15 ` Mike Travis
0 siblings, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-03-26 16:59 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, H. Peter Anvin
Mike Travis wrote:
> Hmm, I hadn't thought of that. There is commonly a format spec called
> %b for diags, etc. to print bit strings. Maybe something like:
>
> "... %*b ...", nr_cpu_ids, ptr_to_bitmap
>
> where the length arg is rounded up to 32 or 64 bits...?
>
I think that would need to be %.*b, but I always need to try it both
ways anyway...
But yes, that seems like the right way to go.
>> Eh? What's the difference between snprintf and scnprintf?
>>
>
> Good question... I'll have to ask the cpumask person. ;-)
>
It's in generic lib/vsprintf.c. The two functions are pretty much
identical... Oh, I see; snprintf returns the total output size,
regardless of whether it fits into the provided buffer, but scnprintf
returns the actual output size, clipped by the buffer length.
J
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 16:59 ` Jeremy Fitzhardinge
@ 2008-03-26 18:15 ` Mike Travis
0 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 18:15 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, H. Peter Anvin
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> Hmm, I hadn't thought of that. There is commonly a format spec called
>> %b for diags, etc. to print bit strings. Maybe something like:
>>
>> "... %*b ...", nr_cpu_ids, ptr_to_bitmap
>>
>> where the length arg is rounded up to 32 or 64 bits...?
>
> I think that would need to be %.*b, but I always need to try it both
> ways anyway...
>
> But yes, that seems like the right way to go.
I had the same thought after hitting return.
But for this case, I was over thinking the problem. Turns out that the
number of cpus in a leaf will be fairly small, even with new cpus around
the corner (maybe 64 or 128 cpu threads per leaf?)
So I dropped the cpumask_scnprintf_len() patch and have a new intel_cacheinfo
patch which I'll send in a separate message.
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 15:41 ` Mike Travis
2008-03-26 16:13 ` Jeremy Fitzhardinge
@ 2008-03-26 17:12 ` Ingo Molnar
2008-03-26 17:28 ` H. Peter Anvin
1 sibling, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 17:12 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, linux-mm, linux-kernel, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, Andi Kleen
* Mike Travis <travis@sgi.com> wrote:
> >> + int n = 0;
> >> + int len = cpumask_scnprintf_len(nr_cpu_ids);
> >> + char *mask_str = kmalloc(len, GFP_KERNEL);
> >> +
> >> + if (mask_str) {
> >> + cpumask_scnprintf(mask_str, len, this_leaf->shared_cpu_map);
> >> + n = sprintf(buf, "%s\n", mask_str);
> >> + kfree(mask_str);
> >> + }
> >> + return n;
> >
> > the other changes look good, but this one looks a bit ugly and complex.
> > We basically want to sprintf shared_cpu_map into 'buf', but we do that
> > by first allocating a temporary buffer, print a string into it, then
> > print that string into another buffer ...
> >
> > this very much smells like an API bug in cpumask_scnprintf() - why dont
> > you create a cpumask_scnprintf_ptr() API that takes a pointer to a
> > cpumask? Then this change would become a trivial and much more readable:
> >
> > - char mask_str[NR_CPUS];
> > - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
> > - return sprintf(buf, "%s\n", mask_str);
> > + return cpumask_scnprintf_ptr(buf, NR_CPUS, &this_leaf->shared_cpu_map);
> >
> > Ingo
>
> The main goal was to avoid allocating 4096 bytes when only 32 would do
> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS
> cpus.) But I'll look at cleaning it up a bit more. It wouldn't have
> to be a function if CHUNKSZ in cpumask_scnprintf() were visible (or a
> non-changeable constant.)
well, do we care about allocating 4096 bytes, as long as we also free
it? It's not like we need to clear all the bytes or something. Am i
missing something here?
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 17:12 ` Ingo Molnar
@ 2008-03-26 17:28 ` H. Peter Anvin
2008-03-26 17:35 ` Ingo Molnar
0 siblings, 1 reply; 33+ messages in thread
From: H. Peter Anvin @ 2008-03-26 17:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: Mike Travis, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, Andi Kleen
Ingo Molnar wrote:
>> The main goal was to avoid allocating 4096 bytes when only 32 would do
>> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS
>> cpus.) But I'll look at cleaning it up a bit more. It wouldn't have
>> to be a function if CHUNKSZ in cpumask_scnprintf() were visible (or a
>> non-changeable constant.)
>
> well, do we care about allocating 4096 bytes, as long as we also free
> it? It's not like we need to clear all the bytes or something. Am i
> missing something here?
Well, 32 bytes fits on the stack, whereas 4096 bytes requires allocating
a page -- which means either taking the risk of failing or blocking. Of
course, we're doing this for output, which has the same issue.
-hpa
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 17:28 ` H. Peter Anvin
@ 2008-03-26 17:35 ` Ingo Molnar
2008-03-26 18:20 ` Mike Travis
0 siblings, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 17:35 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Mike Travis, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, Andi Kleen
* H. Peter Anvin <hpa@zytor.com> wrote:
> Ingo Molnar wrote:
>>> The main goal was to avoid allocating 4096 bytes when only 32 would do
>>> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS cpus.)
>>> But I'll look at cleaning it up a bit more. It wouldn't have to be a
>>> function if CHUNKSZ in cpumask_scnprintf() were visible (or a
>>> non-changeable constant.)
>>
>> well, do we care about allocating 4096 bytes, as long as we also free it?
>> It's not like we need to clear all the bytes or something. Am i missing
>> something here?
>
> Well, 32 bytes fits on the stack, whereas 4096 bytes requires
> allocating a page -- which means either taking the risk of failing or
> blocking. Of course, we're doing this for output, which has the same
> issue.
hm, i thought this was all implemented via dynamic allocation already,
within the cpumask_scnprintf function. But i see it doesnt do it - i
guess a new call could be introduced, cpumask_scnprintf_ptr() which
passes in a cpumask pointer and does dynamic allocation itself?
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo
2008-03-26 17:35 ` Ingo Molnar
@ 2008-03-26 18:20 ` Mike Travis
0 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 18:20 UTC (permalink / raw)
To: Ingo Molnar
Cc: H. Peter Anvin, Andrew Morton, linux-mm, linux-kernel,
Thomas Gleixner, Ingo Molnar, Andi Kleen
Ingo Molnar wrote:
> * H. Peter Anvin <hpa@zytor.com> wrote:
>
>> Ingo Molnar wrote:
>>>> The main goal was to avoid allocating 4096 bytes when only 32 would do
>>>> (characters needed to represent nr_cpu_ids cpus instead of NR_CPUS cpus.)
>>>> But I'll look at cleaning it up a bit more. It wouldn't have to be a
>>>> function if CHUNKSZ in cpumask_scnprintf() were visible (or a
>>>> non-changeable constant.)
>>> well, do we care about allocating 4096 bytes, as long as we also free it?
>>> It's not like we need to clear all the bytes or something. Am i missing
>>> something here?
>> Well, 32 bytes fits on the stack, whereas 4096 bytes requires
>> allocating a page -- which means either taking the risk of failing or
>> blocking. Of course, we're doing this for output, which has the same
>> issue.
>
> hm, i thought this was all implemented via dynamic allocation already,
> within the cpumask_scnprintf function. But i see it doesnt do it - i
> guess a new call could be introduced, cpumask_scnprintf_ptr() which
> passes in a cpumask pointer and does dynamic allocation itself?
>
> Ingo
Here's a snippet of the new patch. This works fine (I think) for
cpus on a leaf. The sched_debug_one problem should work the same way,
hopefully ;-)
[sorry, cut and pasted so no tabs]
static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
{
- char mask_str[NR_CPUS];
- cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
- return sprintf(buf, "%s\n", mask_str);
+ /*
+ * cpulist_scnprintf() has the advantage of compressing
+ * consecutive cpu numbers into a single range which seems
+ * appropriate for cpus on a leaf. This will change what is
+ * output so scripts that process the output will have to change.
+ * The good news is that the output format is compatible
+ * with cpulist_parse() [bitmap_parselist()].
+ *
+ * Have to guess at output buffer size... 128 seems reasonable
+ * to represent all cpus on a leaf in the worst case, like
+ * if all cpus are non-consecutive and large numbers.
+ */
+ return cpulist_scnprintf(buf, 128, this_leaf->shared_cpu_map);
}
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 07/10] cpu: change cpu_sys_devices from array to per_cpu variable
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (5 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 06/10] x86: reduce memory and stack usage in intel_cacheinfo Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-25 22:06 ` [PATCH 08/10] net: remove NR_CPUS arrays in net/core/dev.c v2 Mike Travis
` (3 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel
[-- Attachment #1: nr_cpus-in-cpu_c --]
[-- Type: text/plain, Size: 1860 bytes --]
Change cpu_sys_devices from array to per_cpu variable in
drivers/base/cpu.c.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
(MAINTAINER unknown)
Signed-off-by: Mike Travis <travis@sgi.com>
---
drivers/base/cpu.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- linux.trees.git.orig/drivers/base/cpu.c
+++ linux.trees.git/drivers/base/cpu.c
@@ -18,7 +18,7 @@ struct sysdev_class cpu_sysdev_class = {
};
EXPORT_SYMBOL(cpu_sysdev_class);
-static struct sys_device *cpu_sys_devices[NR_CPUS];
+static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
#ifdef CONFIG_HOTPLUG_CPU
static ssize_t show_online(struct sys_device *dev, char *buf)
@@ -68,7 +68,7 @@ void unregister_cpu(struct cpu *cpu)
sysdev_remove_file(&cpu->sysdev, &attr_online);
sysdev_unregister(&cpu->sysdev);
- cpu_sys_devices[logical_cpu] = NULL;
+ per_cpu(cpu_sys_devices, logical_cpu) = NULL;
return;
}
#else /* ... !CONFIG_HOTPLUG_CPU */
@@ -122,7 +122,7 @@ int __cpuinit register_cpu(struct cpu *c
if (!error && cpu->hotpluggable)
register_cpu_control(cpu);
if (!error)
- cpu_sys_devices[num] = &cpu->sysdev;
+ per_cpu(cpu_sys_devices, num) = &cpu->sysdev;
if (!error)
register_cpu_under_node(num, cpu_to_node(num));
@@ -135,8 +135,8 @@ int __cpuinit register_cpu(struct cpu *c
struct sys_device *get_cpu_sysdev(unsigned cpu)
{
- if (cpu < NR_CPUS)
- return cpu_sys_devices[cpu];
+ if (cpu < nr_cpu_ids && cpu_possible(cpu))
+ return per_cpu(cpu_sys_devices, cpu);
else
return NULL;
}
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 08/10] net: remove NR_CPUS arrays in net/core/dev.c v2
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (6 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 07/10] cpu: change cpu_sys_devices from array to per_cpu variable Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-25 22:06 ` [PATCH 09/10] x86: oprofile: remove NR_CPUS arrays in arch/x86/oprofile/nmi_int.c Mike Travis
` (2 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton
Cc: Ingo Molnar, linux-mm, linux-kernel, David S. Miller,
Alexey Kuznetsov, James Morris, Patrick McHardy
[-- Attachment #1: nr_cpus-in-net_core_dev --]
[-- Type: text/plain, Size: 2371 bytes --]
Remove the fixed size channels[NR_CPUS] array in
net/core/dev.c and dynamically allocate array based on
nr_cpu_ids.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: David S. Miller <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Mike Travis <travis@sgi.com>
---
v2: fixed logic error in netdev_dma_register().
---
net/core/dev.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
--- linux.trees.git.orig/net/core/dev.c
+++ linux.trees.git/net/core/dev.c
@@ -162,7 +162,7 @@ struct net_dma {
struct dma_client client;
spinlock_t lock;
cpumask_t channel_mask;
- struct dma_chan *channels[NR_CPUS];
+ struct dma_chan **channels;
};
static enum dma_state_client
@@ -2444,7 +2444,7 @@ static struct netif_rx_stats *softnet_ge
{
struct netif_rx_stats *rc = NULL;
- while (*pos < NR_CPUS)
+ while (*pos < nr_cpu_ids)
if (cpu_online(*pos)) {
rc = &per_cpu(netdev_rx_stat, *pos);
break;
@@ -4316,7 +4316,7 @@ netdev_dma_event(struct dma_client *clie
spin_lock(&net_dma->lock);
switch (state) {
case DMA_RESOURCE_AVAILABLE:
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
if (net_dma->channels[i] == chan) {
found = 1;
break;
@@ -4331,7 +4331,7 @@ netdev_dma_event(struct dma_client *clie
}
break;
case DMA_RESOURCE_REMOVED:
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
if (net_dma->channels[i] == chan) {
found = 1;
pos = i;
@@ -4358,6 +4358,13 @@ netdev_dma_event(struct dma_client *clie
*/
static int __init netdev_dma_register(void)
{
+ net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
+ GFP_KERNEL);
+ if (unlikely(!net_dma.channels)) {
+ printk(KERN_NOTICE
+ "netdev_dma: no memory for net_dma.channels\n");
+ return -ENOMEM;
+ }
spin_lock_init(&net_dma.lock);
dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
dma_async_client_register(&net_dma.client);
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 09/10] x86: oprofile: remove NR_CPUS arrays in arch/x86/oprofile/nmi_int.c
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (7 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 08/10] net: remove NR_CPUS arrays in net/core/dev.c v2 Mike Travis
@ 2008-03-25 22:06 ` Mike Travis
2008-03-26 6:53 ` Ingo Molnar
2008-03-25 22:07 ` [PATCH 10/10] sched: Remove fixed NR_CPUS sized arrays in kernel_sched.c Mike Travis
2008-03-26 6:34 ` [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Ingo Molnar
10 siblings, 1 reply; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel, Philippe Elie
[-- Attachment #1: nr_cpus-in-nmi_int_c --]
[-- Type: text/plain, Size: 5867 bytes --]
Change the following arrays sized by NR_CPUS to be PERCPU variables:
static struct op_msrs cpu_msrs[NR_CPUS];
static unsigned long saved_lvtpc[NR_CPUS];
Also some minor complaints from checkpatch.pl fixed.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Philippe Elie <phil.el@wanadoo.fr>
Signed-off-by: Mike Travis <travis@sgi.com>
---
All changes were transparent except for:
static void nmi_shutdown(void)
{
+ struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unregister_die_notifier(&profile_exceptions_nb);
- model->shutdown(cpu_msrs);
+ model->shutdown(msrs);
free_msrs();
}
The existing code passed a reference to cpu 0's instance of struct op_msrs
to model->shutdown, whilst the other functions are passed a reference to
<this cpu's> instance of a struct op_msrs. This seemed to be a bug to me
even though as long as cpu 0 and <this cpu> are of the same type it would
have the same effect...?
---
arch/x86/oprofile/nmi_int.c | 49 ++++++++++++++++++++++++--------------------
1 file changed, 27 insertions(+), 22 deletions(-)
--- linux.trees.git.orig/arch/x86/oprofile/nmi_int.c
+++ linux.trees.git/arch/x86/oprofile/nmi_int.c
@@ -23,8 +23,8 @@
#include "op_x86_model.h"
static struct op_x86_model_spec const *model;
-static struct op_msrs cpu_msrs[NR_CPUS];
-static unsigned long saved_lvtpc[NR_CPUS];
+static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
+static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
static int nmi_start(void);
static void nmi_stop(void);
@@ -89,7 +89,7 @@ static int profile_exceptions_notify(str
switch (val) {
case DIE_NMI:
- if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
+ if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
ret = NOTIFY_STOP;
break;
default:
@@ -126,7 +126,7 @@ static void nmi_cpu_save_registers(struc
static void nmi_save_registers(void *dummy)
{
int cpu = smp_processor_id();
- struct op_msrs *msrs = &cpu_msrs[cpu];
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
nmi_cpu_save_registers(msrs);
}
@@ -134,10 +134,10 @@ static void free_msrs(void)
{
int i;
for_each_possible_cpu(i) {
- kfree(cpu_msrs[i].counters);
- cpu_msrs[i].counters = NULL;
- kfree(cpu_msrs[i].controls);
- cpu_msrs[i].controls = NULL;
+ kfree(per_cpu(cpu_msrs, i).counters);
+ per_cpu(cpu_msrs, i).counters = NULL;
+ kfree(per_cpu(cpu_msrs, i).controls);
+ per_cpu(cpu_msrs, i).controls = NULL;
}
}
@@ -149,13 +149,15 @@ static int allocate_msrs(void)
int i;
for_each_possible_cpu(i) {
- cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
- if (!cpu_msrs[i].counters) {
+ per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).counters) {
success = 0;
break;
}
- cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
- if (!cpu_msrs[i].controls) {
+ per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).controls) {
success = 0;
break;
}
@@ -170,11 +172,11 @@ static int allocate_msrs(void)
static void nmi_cpu_setup(void *dummy)
{
int cpu = smp_processor_id();
- struct op_msrs *msrs = &cpu_msrs[cpu];
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
spin_lock(&oprofilefs_lock);
model->setup_ctrs(msrs);
spin_unlock(&oprofilefs_lock);
- saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+ per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
@@ -203,13 +205,15 @@ static int nmi_setup(void)
*/
/* Assume saved/restored counters are the same on all CPUs */
- model->fill_in_addresses(&cpu_msrs[0]);
+ model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
for_each_possible_cpu(cpu) {
if (cpu != 0) {
- memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
+ memcpy(per_cpu(cpu_msrs, cpu).counters,
+ per_cpu(cpu_msrs, 0).counters,
sizeof(struct op_msr) * model->num_counters);
- memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls,
+ memcpy(per_cpu(cpu_msrs, cpu).controls,
+ per_cpu(cpu_msrs, 0).controls,
sizeof(struct op_msr) * model->num_controls);
}
@@ -249,7 +253,7 @@ static void nmi_cpu_shutdown(void *dummy
{
unsigned int v;
int cpu = smp_processor_id();
- struct op_msrs *msrs = &cpu_msrs[cpu];
+ struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
/* restoring APIC_LVTPC can trigger an apic error because the delivery
* mode and vector nr combination can be illegal. That's by design: on
@@ -258,23 +262,24 @@ static void nmi_cpu_shutdown(void *dummy
*/
v = apic_read(APIC_LVTERR);
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
- apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+ apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
apic_write(APIC_LVTERR, v);
nmi_restore_registers(msrs);
}
static void nmi_shutdown(void)
{
+ struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unregister_die_notifier(&profile_exceptions_nb);
- model->shutdown(cpu_msrs);
+ model->shutdown(msrs);
free_msrs();
}
static void nmi_cpu_start(void *dummy)
{
- struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
+ struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
model->start(msrs);
}
@@ -286,7 +291,7 @@ static int nmi_start(void)
static void nmi_cpu_stop(void *dummy)
{
- struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
+ struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
model->stop(msrs);
}
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 09/10] x86: oprofile: remove NR_CPUS arrays in arch/x86/oprofile/nmi_int.c
2008-03-25 22:06 ` [PATCH 09/10] x86: oprofile: remove NR_CPUS arrays in arch/x86/oprofile/nmi_int.c Mike Travis
@ 2008-03-26 6:53 ` Ingo Molnar
0 siblings, 0 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 6:53 UTC (permalink / raw)
To: Mike Travis; +Cc: Andrew Morton, linux-mm, linux-kernel, Philippe Elie
* Mike Travis <travis@sgi.com> wrote:
> Change the following arrays sized by NR_CPUS to be PERCPU variables:
>
> static struct op_msrs cpu_msrs[NR_CPUS];
> static unsigned long saved_lvtpc[NR_CPUS];
>
> Also some minor complaints from checkpatch.pl fixed.
thanks, applied.
> All changes were transparent except for:
>
> static void nmi_shutdown(void)
> {
> + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
> nmi_enabled = 0;
> on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
> unregister_die_notifier(&profile_exceptions_nb);
> - model->shutdown(cpu_msrs);
> + model->shutdown(msrs);
> free_msrs();
> }
>
> The existing code passed a reference to cpu 0's instance of struct
> op_msrs to model->shutdown, whilst the other functions are passed a
> reference to <this cpu's> instance of a struct op_msrs. This seemed
> to be a bug to me even though as long as cpu 0 and <this cpu> are of
> the same type it would have the same effect...?
i dont think this has any real effect in practice (the model pointers
are not expected to change across cpus on the same system) - but in any
case i've promoted your observation to the main portion of the changelog
so that we'll have notice of this.
(someone might want to play with simulating a weaker CPU on a secondary
core, but we've got tons of other assumptions on CPU type symmetry.)
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 10/10] sched: Remove fixed NR_CPUS sized arrays in kernel_sched.c
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (8 preceding siblings ...)
2008-03-25 22:06 ` [PATCH 09/10] x86: oprofile: remove NR_CPUS arrays in arch/x86/oprofile/nmi_int.c Mike Travis
@ 2008-03-25 22:07 ` Mike Travis
2008-03-26 6:34 ` [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Ingo Molnar
10 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-25 22:07 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ingo Molnar, linux-mm, linux-kernel
[-- Attachment #1: nr_cpus-in-kernel_sched --]
[-- Type: text/plain, Size: 7993 bytes --]
Change fixed size arrays to per_cpu variables or dynamically allocated
arrays in sched_init() and sched_init_smp().
(1) static struct sched_entity *init_sched_entity_p[NR_CPUS];
(1) static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
(1) static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
(1) static struct rt_rq *init_rt_rq_p[NR_CPUS];
static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
char str[NR_CPUS];
int ints[NR_CPUS], i;
(1 - these arrays are allocated via alloc_bootmem_low())
Also in sched_create_group() we allocate new arrays based on nr_cpu_ids.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Mike Travis <travis@sgi.com>
---
kernel/sched.c | 92 +++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 63 insertions(+), 29 deletions(-)
--- linux.trees.git.orig/kernel/sched.c
+++ linux.trees.git/kernel/sched.c
@@ -66,6 +66,7 @@
#include <linux/unistd.h>
#include <linux/pagemap.h>
#include <linux/hrtimer.h>
+#include <linux/bootmem.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -193,17 +194,11 @@ struct task_group {
static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
/* Default task group's cfs_rq on each cpu */
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
-
-static struct sched_entity *init_sched_entity_p[NR_CPUS];
-static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
#endif
#ifdef CONFIG_RT_GROUP_SCHED
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
-
-static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
-static struct rt_rq *init_rt_rq_p[NR_CPUS];
#endif
/* task_group_lock serializes add/remove of task groups and also changes to
@@ -227,17 +222,7 @@ static int init_task_group_load = INIT_T
/* Default task group.
* Every task in system belong to this group at bootup.
*/
-struct task_group init_task_group = {
-#ifdef CONFIG_FAIR_GROUP_SCHED
- .se = init_sched_entity_p,
- .cfs_rq = init_cfs_rq_p,
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- .rt_se = init_sched_rt_entity_p,
- .rt_rq = init_rt_rq_p,
-#endif
-};
+struct task_group init_task_group;
/* return group to which a task belongs */
static inline struct task_group *task_group(struct task_struct *p)
@@ -3518,7 +3503,7 @@ static inline void trigger_load_balance(
*/
int ilb = first_cpu(nohz.cpu_mask);
- if (ilb != NR_CPUS)
+ if (ilb < nr_cpu_ids)
resched_cpu(ilb);
}
}
@@ -5470,11 +5455,11 @@ static void move_task_off_dead_cpu(int d
dest_cpu = any_online_cpu(mask);
/* On any allowed CPU? */
- if (dest_cpu == NR_CPUS)
+ if (dest_cpu >= nr_cpu_ids)
dest_cpu = any_online_cpu(p->cpus_allowed);
/* No more Mr. Nice Guy. */
- if (dest_cpu == NR_CPUS) {
+ if (dest_cpu >= nr_cpu_ids) {
cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
/*
* Try to stay on the same cpuset, where the
@@ -5929,9 +5914,16 @@ static int sched_domain_debug_one(struct
{
struct sched_group *group = sd->groups;
cpumask_t groupmask;
- char str[NR_CPUS];
+ int len = cpumask_scnprintf_len(nr_cpu_ids);
+ char *str = kmalloc(len, GFP_KERNEL);
+ int ret = 0;
+
+ if (!str) {
+ printk(KERN_DEBUG "Cannot load-balance (no memory)\n");
+ return -1;
+ }
- cpumask_scnprintf(str, NR_CPUS, sd->span);
+ cpumask_scnprintf(str, len, sd->span);
cpus_clear(groupmask);
printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -5941,6 +5933,7 @@ static int sched_domain_debug_one(struct
if (sd->parent)
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
" has parent");
+ kfree(str);
return -1;
}
@@ -5984,7 +5977,7 @@ static int sched_domain_debug_one(struct
cpus_or(groupmask, groupmask, group->cpumask);
- cpumask_scnprintf(str, NR_CPUS, group->cpumask);
+ cpumask_scnprintf(str, len, group->cpumask);
printk(KERN_CONT " %s", str);
group = group->next;
@@ -5997,6 +5990,8 @@ static int sched_domain_debug_one(struct
if (sd->parent && !cpus_subset(groupmask, sd->parent->span))
printk(KERN_ERR "ERROR: parent span is not a superset "
"of domain->span\n");
+
+ kfree(str);
return 0;
}
@@ -6198,7 +6193,7 @@ __setup("isolcpus=", isolated_cpu_setup)
/*
* init_sched_build_groups takes the cpumask we wish to span, and a pointer
* to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
* (due to the fact that we keep track of groups covered with a cpumask_t).
*
* init_sched_build_groups will build a circular linked list of the groups
@@ -6396,7 +6391,7 @@ cpu_to_phys_group(int cpu, const cpumask
* gets dynamically allocated.
*/
static DEFINE_PER_CPU(struct sched_domain, node_domains);
-static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
+static struct sched_group ***sched_group_nodes_bycpu;
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
@@ -7039,6 +7034,11 @@ void __init sched_init_smp(void)
{
cpumask_t non_isolated_cpus;
+#if defined(CONFIG_NUMA)
+ sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
+ GFP_KERNEL);
+ BUG_ON(sched_group_nodes_bycpu == NULL);
+#endif
get_online_cpus();
arch_init_sched_domains(&cpu_online_map);
cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
@@ -7056,6 +7056,11 @@ void __init sched_init_smp(void)
#else
void __init sched_init_smp(void)
{
+#if defined(CONFIG_NUMA)
+ sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
+ GFP_KERNEL);
+ BUG_ON(sched_group_nodes_bycpu == NULL);
+#endif
sched_init_granularity();
}
#endif /* CONFIG_SMP */
@@ -7149,6 +7154,35 @@ static void init_tg_rt_entry(struct rq *
void __init sched_init(void)
{
int i, j;
+ unsigned long alloc_size = 0, ptr;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ alloc_size += 2 * nr_cpu_ids * sizeof(void **);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ alloc_size += 2 * nr_cpu_ids * sizeof(void **);
+#endif
+ /*
+ * As sched_init() is called before page_alloc is setup,
+ * we use alloc_bootmem().
+ */
+ if (alloc_size) {
+ ptr = (unsigned long)alloc_bootmem_low(alloc_size);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ init_task_group.se = (struct sched_entity **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+
+ init_task_group.cfs_rq = (struct cfs_rq **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ init_task_group.rt_se = (struct sched_rt_entity **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+
+ init_task_group.rt_rq = (struct rt_rq **)ptr;
+#endif
+ }
#ifdef CONFIG_SMP
init_defrootdomain();
@@ -7394,10 +7428,10 @@ static int alloc_fair_sched_group(struct
struct rq *rq;
int i;
- tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
+ tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
if (!tg->cfs_rq)
goto err;
- tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
+ tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL);
if (!tg->se)
goto err;
@@ -7477,10 +7511,10 @@ static int alloc_rt_sched_group(struct t
struct rq *rq;
int i;
- tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+ tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
if (!tg->rt_rq)
goto err;
- tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+ tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
if (!tg->rt_se)
goto err;
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2
2008-03-25 22:06 [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Mike Travis
` (9 preceding siblings ...)
2008-03-25 22:07 ` [PATCH 10/10] sched: Remove fixed NR_CPUS sized arrays in kernel_sched.c Mike Travis
@ 2008-03-26 6:34 ` Ingo Molnar
2008-03-26 15:48 ` Mike Travis
10 siblings, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-03-26 6:34 UTC (permalink / raw)
To: Mike Travis; +Cc: Andrew Morton, linux-mm, linux-kernel
* Mike Travis <travis@sgi.com> wrote:
> Wii, isn't this fun...! This is a resubmission of yesterday's patches
> based on the x86.git/latest tree. Yes, it _is_ a maze of twisty litle
> passages. ;-)
just to make patch dependencies clear: most of the patches here can be
applied to their base trees as-is, without depending on any other patch,
correct?
the only undeclared dependency i found was the cpumask_scnprintf_len()
patch - please prominently list dependencies in the changelog like this:
[ this patch depends on "cpumask: Add cpumask_scnprintf_len function" ]
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2
2008-03-26 6:34 ` [PATCH 00/10] NR_CPUS: third reduction of NR_CPUS memory usage x86-version v2 Ingo Molnar
@ 2008-03-26 15:48 ` Mike Travis
0 siblings, 0 replies; 33+ messages in thread
From: Mike Travis @ 2008-03-26 15:48 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Andrew Morton, linux-mm, linux-kernel
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Wii, isn't this fun...! This is a resubmission of yesterday's patches
>> based on the x86.git/latest tree. Yes, it _is_ a maze of twisty litle
>> passages. ;-)
>
> just to make patch dependencies clear: most of the patches here can be
> applied to their base trees as-is, without depending on any other patch,
> correct?
>
> the only undeclared dependency i found was the cpumask_scnprintf_len()
> patch - please prominently list dependencies in the changelog like this:
>
> [ this patch depends on "cpumask: Add cpumask_scnprintf_len function" ]
>
> Ingo
Ahh, ok. I was under the assumption that an entire patchset would be
applied en-mass and only divided up by bi-sect debugging...?
The second patchset (cpumask) is highly incremental and I did it like
this to show memory gains (or losses). I tossed a few patches that
didn't have any overall goodness (and have a few more to help with
the memory footprint or performance in the queue.)
Thanks,
Mike
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 33+ messages in thread