From: William Lee Irwin III <wli@holomorphy.com>
To: linux-mm@kvack.org
Subject: __alloc_pages_core speedup
Date: Tue, 24 Jul 2001 16:30:10 -0700 [thread overview]
Message-ID: <20010724163010.A3593@holomorphy.com> (raw)
On IBM NUMA hardware, there was a peculiar delay during boot just
prior to "ACPI: Installing SCI 9 handler pass".
It was determined that this delay (of approximately 80 seconds)
consisted of calls to __alloc_bootmem_core. It appears that it
is only called when needed, and that an individual call was
taking a great deal of time. I'm not sure what the right way to
fix this is, but I've tried a few approaches. I am interested
in hearing of other ways to cope with this situation that would
be more effective or more palatable.
First, I tried to alter this so that it read in blocks of some
compiler-supported size (e.g. u64) and performed its checks on
64-bit blocks at a time, with (of course) some provisions for
end cases. This was not successful, either due to unexpected
interactions or implementation errors.
The following merely provides some non-atomic bit operations,
and replaces the calls to the atomic versions in bootmem.c with
calls to them. It provides a speedup of 8 seconds, providing the
following timings:
---- seconds from EFI to APCI: Installing SCI 9 handler pass
Atomic: 79
Non-atomic: 71
---- seconds from above to VFS mount
Atomic: 122
Non-atomic: 114
... and other significant landmarks in booting differ by 8 seconds.
I don't believe this issue is truly architecture-specific, as it
appears to me that the properties of the algorithm would likely scale
similarly on different architectures. And I can also provide
implementations of the non-atomic bit operations for other architectures.
Cheers,
Bill
P.S.: The diffs follow:
--- linux-old/mm/bootmem.c Mon Jul 23 11:37:58 2001
+++ linux-0626/mm/bootmem.c Wed Jul 18 14:50:02 2001
@@ -89,17 +89,17 @@
BUG();
if (sidx >= eidx)
BUG();
if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn)
BUG();
if (end > bdata->node_low_pfn)
BUG();
for (i = sidx; i < eidx; i++)
- if (test_and_set_bit(i, bdata->node_bootmem_map))
+ if (__test_and_set_bit(i, bdata->node_bootmem_map))
printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
}
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
{
unsigned long i;
unsigned long start;
/*
@@ -116,17 +116,17 @@
/*
* Round up the beginning of the address.
*/
start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
sidx = start - (bdata->node_boot_start/PAGE_SIZE);
for (i = sidx; i < eidx; i++) {
- if (!test_and_clear_bit(i, bdata->node_bootmem_map))
+ if (!__test_and_clear_bit(i, bdata->node_bootmem_map))
BUG();
}
}
/*
* We 'merge' subsequent allocations to save space. We might 'lose'
* some fraction of a page if allocations cannot be satisfied due to
* size constraints on boxes where there is physical RAM space
@@ -166,22 +166,22 @@
preferred = ((preferred + align - 1) & ~(align - 1)) >> PAGE_SHIFT;
areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
restart_scan:
for (i = preferred; i < eidx; i += incr) {
unsigned long j;
- if (test_bit(i, bdata->node_bootmem_map))
+ if (__test_bit(i, bdata->node_bootmem_map))
continue;
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit (j, bdata->node_bootmem_map))
+ if (__test_bit (j, bdata->node_bootmem_map))
goto fail_block;
}
start = i;
goto found;
fail_block:;
}
if (preferred) {
preferred = 0;
@@ -222,17 +222,17 @@
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
}
/*
* Reserve the area now:
*/
for (i = start; i < start+areasize; i++)
- if (test_and_set_bit(i, bdata->node_bootmem_map))
+ if (__test_and_set_bit(i, bdata->node_bootmem_map))
BUG();
memset(ret, 0, size);
return ret;
}
static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
{
struct page *page = pgdat->node_mem_map;
@@ -240,17 +240,17 @@
unsigned long i, count, total = 0;
unsigned long idx;
if (!bdata->node_bootmem_map) BUG();
count = 0;
idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
for (i = 0; i < idx; i++, page++) {
- if (!test_bit(i, bdata->node_bootmem_map)) {
+ if (!__test_bit(i, bdata->node_bootmem_map)) {
count++;
ClearPageReserved(page);
set_page_count(page, 1);
__free_page(page);
}
}
total += count;
--- linux-old/include/asm-ia64/bitops.h Mon Jul 23 11:38:28 2001
+++ linux-0626/include/asm-ia64/bitops.h Wed Jul 18 14:53:34 2001
@@ -30,16 +30,27 @@
bit = 1 << (nr & 31);
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old | bit;
} while (cmpxchg_acq(m, old, new) != old);
}
+static __inline__ void
+__set_bit(int nr, void * addr)
+{
+ __u32 bit, old, *m;
+ m = ((__u32 *)addr) + (nr >> 5);
+ bit = 0x1 << (nr & 0x1f);
+ old = *m;
+ *m |= bit;
+ return;
+}
+
/*
* clear_bit() doesn't provide any barrier for the compiler.
*/
#define smp_mb__before_clear_bit() smp_mb()
#define smp_mb__after_clear_bit() smp_mb()
static __inline__ void
clear_bit (int nr, volatile void *addr)
{
@@ -51,16 +62,27 @@
mask = ~(1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old & mask;
} while (cmpxchg_acq(m, old, new) != old);
}
+static __inline__ void
+__clear_bit(int nr, void * addr)
+{
+ __u32 mask, old, *m;
+ m = ((__u32 *)addr) + (nr >> 5);
+ mask = ~(0x1 << (nr & 0x1f));
+ old = *m;
+ *m &= mask;
+ return;
+}
+
/*
* WARNING: non atomic version.
*/
static __inline__ void
__change_bit (int nr, void *addr)
{
volatile __u32 *m = (__u32 *) addr + (nr >> 5);
__u32 bit = (1 << (nr & 31));
@@ -97,32 +119,55 @@
CMPXCHG_BUGCHECK(m);
old = *m;
new = old | bit;
} while (cmpxchg_acq(m, old, new) != old);
return (old & bit) != 0;
}
static __inline__ int
+__test_and_set_bit(int nr, void * addr)
+{
+ __u32 bit, old;
+ __u32 * m;
+ m = ((__u32 *)addr) + (nr >> 5);
+ bit = 0x1 << (nr & 0x1f);
+ old = *m;
+ *m |= bit;
+ return((old & bit) != 0);
+}
+
+static __inline__ int
test_and_clear_bit (int nr, volatile void *addr)
{
__u32 mask, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
mask = ~(1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old & mask;
} while (cmpxchg_acq(m, old, new) != old);
return (old & ~mask) != 0;
}
+static __inline__ int
+__test_and_clear_bit(int nr, void * addr)
+{
+ __u32 mask, old, *m;
+ m = ((__u32 *)addr) + (nr >> 5);
+ mask = ~(0x1 << (nr & 0x1f));
+ old = *m;
+ *m &= mask;
+ return((old & ~mask) != 0);
+}
+
/*
* WARNING: non atomic version.
*/
static __inline__ int
__test_and_change_bit (int nr, void *addr)
{
__u32 old, bit = (1 << (nr & 31));
__u32 *m = (__u32 *) addr + (nr >> 5);
@@ -148,16 +193,22 @@
} while (cmpxchg_acq(m, old, new) != old);
return (old & bit) != 0;
}
static __inline__ int
test_bit (int nr, volatile void *addr)
{
return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
+}
+
+static __inline__ int
+__test_bit(int nr, void * addr)
+{
+ return 0x1 & (((__u32 *)addr)[nr >> 5] >> (nr & 0x1f));
}
/*
* ffz = "find first zero". Returns the bit number (0..63) of the first (least
* significant) bit that is zero in X. Undefined if no zero exists, so code should check
* against ~0UL first...
*/
static inline unsigned long
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
reply other threads:[~2001-07-24 23:30 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20010724163010.A3593@holomorphy.com \
--to=wli@holomorphy.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox