* [PATCH] Optimize away pte_chains for single mappings
@ 2002-07-09 18:35 Dave McCracken
2002-07-13 13:13 ` Daniel Phillips
0 siblings, 1 reply; 13+ messages in thread
From: Dave McCracken @ 2002-07-09 18:35 UTC (permalink / raw)
To: Linux Memory Management
[-- Attachment #1: Type: text/plain, Size: 723 bytes --]
Here's a patch that optimizes out using a struct pte_chain when there's
only one mapping for that page. It re-uses the pte_chain pointer in struct
page, with an appropriate flag. The patch is based on Rik's latest 2.5.25
rmap patch.
I've done basic testing on it (it boots and runs simple commands).
This version of the patch uses an anonymous union, so it only builds with
gcc 3.x. I'm working on an alternate version of the patch, but wanted to
get this one out for people to look at.
Dave McCracken
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
[-- Attachment #2: rmap-opt-2.5.25.diff --]
[-- Type: text/plain, Size: 6289 bytes --]
--- linux-2.5.25-rmap/./include/linux/mm.h Mon Jul 8 15:37:35 2002
+++ linux-2.5.25-rmap-opt/./include/linux/mm.h Tue Jul 9 13:28:32 2002
@@ -157,8 +157,11 @@
updated asynchronously */
struct list_head lru; /* Pageout list, eg. active_list;
protected by pagemap_lru_lock !! */
- struct pte_chain * pte_chain; /* Reverse pte mapping pointer.
+ union {
+ struct pte_chain * pte_chain; /* Reverse pte mapping pointer.
* protected by PG_chainlock */
+ pte_t * pte_direct;
+ };
unsigned long private; /* mapping-private opaque data */
/*
--- linux-2.5.25-rmap/./include/linux/page-flags.h Mon Jul 8 15:37:35 2002
+++ linux-2.5.25-rmap-opt/./include/linux/page-flags.h Tue Jul 9 10:31:28 2002
@@ -66,6 +66,7 @@
#define PG_writeback 13 /* Page is under writeback */
#define PG_nosave 15 /* Used for system suspend/resume */
#define PG_chainlock 16 /* lock bit for ->pte_chain */
+#define PG_direct 17 /* ->pte_chain points directly at pte */
/*
* Global page accounting. One instance per CPU.
@@ -216,6 +217,12 @@
#define TestSetPageNosave(page) test_and_set_bit(PG_nosave, &(page)->flags)
#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags)
#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags)
+
+#define PageDirect(page) test_bit(PG_direct, &(page)->flags)
+#define SetPageDirect(page) set_bit(PG_direct, &(page)->flags)
+#define TestSetPageDirect(page) test_and_set_bit(PG_direct, &(page)->flags)
+#define ClearPageDirect(page) clear_bit(PG_direct, &(page)->flags)
+#define TestClearPageDirect(page) test_and_clear_bit(PG_direct, &(page)->flags)
/*
* inlines for acquisition and release of PG_chainlock
--- linux-2.5.25-rmap/./mm/rmap.c Mon Jul 8 15:37:35 2002
+++ linux-2.5.25-rmap-opt/./mm/rmap.c Tue Jul 9 12:46:07 2002
@@ -71,10 +71,15 @@
if (TestClearPageReferenced(page))
referenced++;
- /* Check all the page tables mapping this page. */
- for (pc = page->pte_chain; pc; pc = pc->next) {
- if (ptep_test_and_clear_young(pc->ptep))
+ if (PageDirect(page)) {
+ if (ptep_test_and_clear_young(page->pte_direct))
referenced++;
+ } else {
+ /* Check all the page tables mapping this page. */
+ for (pc = page->pte_chain; pc; pc = pc->next) {
+ if (ptep_test_and_clear_young(pc->ptep))
+ referenced++;
+ }
}
return referenced;
}
@@ -108,22 +113,39 @@
pte_chain_lock(page);
{
struct pte_chain * pc;
- for (pc = page->pte_chain; pc; pc = pc->next) {
- if (pc->ptep == ptep)
+ if (PageDirect(page)) {
+ if (page->pte_direct == ptep)
BUG();
+ } else {
+ for (pc = page->pte_chain; pc; pc = pc->next) {
+ if (pc->ptep == ptep)
+ BUG();
+ }
}
}
pte_chain_unlock(page);
#endif
- pte_chain = pte_chain_alloc();
-
pte_chain_lock(page);
- /* Hook up the pte_chain to the page. */
- pte_chain->ptep = ptep;
- pte_chain->next = page->pte_chain;
- page->pte_chain = pte_chain;
+ if (PageDirect(page)) {
+ /* Convert a direct pointer into a pte_chain */
+ pte_chain = pte_chain_alloc();
+ pte_chain->ptep = page->pte_direct;
+ pte_chain->next = NULL;
+ page->pte_chain = pte_chain;
+ ClearPageDirect(page);
+ }
+ if (page->pte_chain) {
+ /* Hook up the pte_chain to the page. */
+ pte_chain = pte_chain_alloc();
+ pte_chain->ptep = ptep;
+ pte_chain->next = page->pte_chain;
+ page->pte_chain = pte_chain;
+ } else {
+ page->pte_direct = ptep;
+ SetPageDirect(page);
+ }
pte_chain_unlock(page);
}
@@ -149,18 +171,38 @@
return;
pte_chain_lock(page);
- for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) {
- if (pc->ptep == ptep) {
- pte_chain_free(pc, prev_pc, page);
+
+ if (PageDirect(page)) {
+ if (page->pte_direct == ptep) {
+ page->pte_direct = NULL;
+ ClearPageDirect(page);
goto out;
}
+ } else {
+ for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) {
+ if (pc->ptep == ptep) {
+ pte_chain_free(pc, prev_pc, page);
+ /* Check whether we can convert to direct */
+ pc = page->pte_chain;
+ if (!pc->next) {
+ page->pte_direct = pc->ptep;
+ SetPageDirect(page);
+ pte_chain_free(pc, NULL, NULL);
+ }
+ goto out;
+ }
+ }
}
#ifdef DEBUG_RMAP
/* Not found. This should NEVER happen! */
printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
printk(KERN_ERR "page_remove_rmap: only found: ");
- for (pc = page->pte_chain; pc; pc = pc->next)
- printk("%p ", pc->ptep);
+ if (PageDirect(page)) {
+ printk("%p ", page->pte_direct);
+ } else {
+ for (pc = page->pte_chain; pc; pc = pc->next)
+ printk("%p ", pc->ptep);
+ }
printk("\n");
printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
#endif
@@ -270,22 +312,42 @@
if (!page->mapping)
BUG();
- for (pc = page->pte_chain; pc; pc = next_pc) {
- next_pc = pc->next;
- switch (try_to_unmap_one(page, pc->ptep)) {
+ if (PageDirect(page)) {
+ switch (ret = try_to_unmap_one(page, page->pte_direct)) {
case SWAP_SUCCESS:
- /* Free the pte_chain struct. */
- pte_chain_free(pc, prev_pc, page);
- break;
+ page->pte_direct = NULL;
+ ClearPageDirect(page);
+ return ret;
case SWAP_AGAIN:
- /* Skip this pte, remembering status. */
- prev_pc = pc;
- ret = SWAP_AGAIN;
- continue;
case SWAP_FAIL:
- return SWAP_FAIL;
case SWAP_ERROR:
- return SWAP_ERROR;
+ return ret;
+ }
+ } else {
+ for (pc = page->pte_chain; pc; pc = next_pc) {
+ next_pc = pc->next;
+ switch (try_to_unmap_one(page, pc->ptep)) {
+ case SWAP_SUCCESS:
+ /* Free the pte_chain struct. */
+ pte_chain_free(pc, prev_pc, page);
+ break;
+ case SWAP_AGAIN:
+ /* Skip this pte, remembering status. */
+ prev_pc = pc;
+ ret = SWAP_AGAIN;
+ continue;
+ case SWAP_FAIL:
+ return SWAP_FAIL;
+ case SWAP_ERROR:
+ return SWAP_ERROR;
+ }
+ }
+ /* Check whether we can convert to direct pte pointer */
+ pc = page->pte_chain;
+ if (pc && !pc->next) {
+ page->pte_direct = pc->ptep;
+ SetPageDirect(page);
+ pte_chain_free(pc, NULL, NULL);
}
}
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-09 18:35 [PATCH] Optimize away pte_chains for single mappings Dave McCracken
@ 2002-07-13 13:13 ` Daniel Phillips
2002-07-15 14:02 ` Dave McCracken
0 siblings, 1 reply; 13+ messages in thread
From: Daniel Phillips @ 2002-07-13 13:13 UTC (permalink / raw)
To: Dave McCracken, Linux Memory Management; +Cc: Andrew Morton
On Tuesday 09 July 2002 20:35, Dave McCracken wrote:
>
> Here's a patch that optimizes out using a struct pte_chain when there's
> only one mapping for that page. It re-uses the pte_chain pointer in struct
> page, with an appropriate flag. The patch is based on Rik's latest 2.5.25
> rmap patch.
>
> I've done basic testing on it (it boots and runs simple commands).
>
> This version of the patch uses an anonymous union, so it only builds with
> gcc 3.x. I'm working on an alternate version of the patch, but wanted to
> get this one out for people to look at.
Why are we using up valuable real estate in page->flags when the low bit
of page->pte_chain is available?
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-13 13:13 ` Daniel Phillips
@ 2002-07-15 14:02 ` Dave McCracken
2002-07-15 14:56 ` Daniel Phillips
0 siblings, 1 reply; 13+ messages in thread
From: Dave McCracken @ 2002-07-15 14:02 UTC (permalink / raw)
To: Daniel Phillips; +Cc: Linux Memory Management
--On Saturday, July 13, 2002 03:13:35 PM +0200 Daniel Phillips
<phillips@arcor.de> wrote:
> Why are we using up valuable real estate in page->flags when the low bit
> of page->pte_chain is available?
Right now my flag is bit number 18 in page->flags out of 32. Mechanisms
already exist to manipulate this bit in a reasonable fashion. I don't see
any good reason for complicating things by putting a flag bit into a
pointer, where we'd have to repeatedly check and clear it before we
dereference the pointer. When I discussed this with Rik he said putting it
in flags was reasonable. We can always revisit it in the future if we run
out of bits.
Dave
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 14:02 ` Dave McCracken
@ 2002-07-15 14:56 ` Daniel Phillips
2002-07-15 15:40 ` Matti Aarnio
0 siblings, 1 reply; 13+ messages in thread
From: Daniel Phillips @ 2002-07-15 14:56 UTC (permalink / raw)
To: Dave McCracken; +Cc: Linux Memory Management
On Monday 15 July 2002 16:02, Dave McCracken wrote:
> --On Saturday, July 13, 2002 03:13:35 PM +0200 Daniel Phillips
> <phillips@arcor.de> wrote:
>
> > Why are we using up valuable real estate in page->flags when the low bit
> > of page->pte_chain is available?
>
> Right now my flag is bit number 18 in page->flags out of 32. Mechanisms
> already exist to manipulate this bit in a reasonable fashion. I don't see
> any good reason for complicating things by putting a flag bit into a
> pointer, where we'd have to repeatedly check and clear it before we
> dereference the pointer.
Hi Dave,
It's not more complicated. You have to check which type of pointer you
have anyway, and having to strip away the low bit on one of the two
paths is insignificant in terms of generated code. The current patch
has to set and clear the flag bit separately.
> When I discussed this with Rik he said putting it
> in flags was reasonable. We can always revisit it in the future if we run
> out of bits.
I prefer doing things the most efficient way in core code.
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 14:56 ` Daniel Phillips
@ 2002-07-15 15:40 ` Matti Aarnio
2002-07-15 16:10 ` Daniel Phillips
2002-07-15 16:30 ` Daniel Phillips
0 siblings, 2 replies; 13+ messages in thread
From: Matti Aarnio @ 2002-07-15 15:40 UTC (permalink / raw)
To: Daniel Phillips; +Cc: Dave McCracken, Linux Memory Management
On Mon, Jul 15, 2002 at 04:56:16PM +0200, Daniel Phillips wrote:
> On Monday 15 July 2002 16:02, Dave McCracken wrote:
> > --On Saturday, July 13, 2002 03:13:35 PM +0200 Daniel Phillips
> > <phillips@arcor.de> wrote:
> > > Why are we using up valuable real estate in page->flags when the low bit
> > > of page->pte_chain is available?
> >
> > Right now my flag is bit number 18 in page->flags out of 32. Mechanisms
> > already exist to manipulate this bit in a reasonable fashion. I don't see
> > any good reason for complicating things by putting a flag bit into a
> > pointer, where we'd have to repeatedly check and clear it before we
> > dereference the pointer.
>
> Hi Dave,
>
> It's not more complicated. You have to check which type of pointer you
> have anyway, and having to strip away the low bit on one of the two
> paths is insignificant in terms of generated code. The current patch
> has to set and clear the flag bit separately.
Better not try to play tricks with pointer bits.
Take ibm360 - pointers are 24 bit, 8 high-order bits are free for
application. (ibm 370/XA and 390 redefine things.)
I don't remember what unaligned access did.
Take IBM POWER RISC - pointers are 32 (64) bit, and depending on
target object size, 0-3 low-order bits are
IGNORED (presumed zero) when accessing memory.
Take SPARC - Unaligned access (those low-order bits being non-zero)
causes SIGBUS.
Take Alpha - Unaligned access (...) does unaligned-access-trap.
Take i386 - Unaligned accesses are executed happily...
So.. Some systems can give you 1-3 low-order bits, sometimes needing
definite masking before usage. In register-lacking i386 this
masking is definite punishment..
> > When I discussed this with Rik he said putting it
> > in flags was reasonable. We can always revisit it in the future if we run
> > out of bits.
>
> I prefer doing things the most efficient way in core code.
>
> --
> Daniel
/Matti Aarnio
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 15:40 ` Matti Aarnio
@ 2002-07-15 16:10 ` Daniel Phillips
2002-07-15 16:34 ` Rik van Riel
2002-07-15 16:30 ` Daniel Phillips
1 sibling, 1 reply; 13+ messages in thread
From: Daniel Phillips @ 2002-07-15 16:10 UTC (permalink / raw)
To: Matti Aarnio; +Cc: Dave McCracken, Linux Memory Management
On Monday 15 July 2002 17:40, Matti Aarnio wrote:
> On Mon, Jul 15, 2002 at 04:56:16PM +0200, Daniel Phillips wrote:
> > On Monday 15 July 2002 16:02, Dave McCracken wrote:
> > > --On Saturday, July 13, 2002 03:13:35 PM +0200 Daniel Phillips
> > > <phillips@arcor.de> wrote:
> > > > Why are we using up valuable real estate in page->flags when the low bit
> > > > of page->pte_chain is available?
> > >
> > > Right now my flag is bit number 18 in page->flags out of 32. Mechanisms
> > > already exist to manipulate this bit in a reasonable fashion. I don't see
> > > any good reason for complicating things by putting a flag bit into a
> > > pointer, where we'd have to repeatedly check and clear it before we
> > > dereference the pointer.
> >
> > Hi Dave,
> >
> > It's not more complicated. You have to check which type of pointer you
> > have anyway, and having to strip away the low bit on one of the two
> > paths is insignificant in terms of generated code. The current patch
> > has to set and clear the flag bit separately.
>
> Better not try to play tricks with pointer bits.
>
> Take ibm360 - pointers are 24 bit, 8 high-order bits are free for
> application. (ibm 370/XA and 390 redefine things.)
> I don't remember what unaligned access did.
> Take IBM POWER RISC - pointers are 32 (64) bit, and depending on
> target object size, 0-3 low-order bits are
> IGNORED (presumed zero) when accessing memory.
> Take SPARC - Unaligned access (those low-order bits being non-zero)
> causes SIGBUS.
> Take Alpha - Unaligned access (...) does unaligned-access-trap.
> Take i386 - Unaligned accesses are executed happily...
>
> So.. Some systems can give you 1-3 low-order bits, sometimes needing
> definite masking before usage. In register-lacking i386 this
> masking is definite punishment..
None of these cases apply, the low bit is always masked off before being
used as a pointer.
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 16:10 ` Daniel Phillips
@ 2002-07-15 16:34 ` Rik van Riel
2002-07-15 16:42 ` Daniel Phillips
0 siblings, 1 reply; 13+ messages in thread
From: Rik van Riel @ 2002-07-15 16:34 UTC (permalink / raw)
To: Daniel Phillips; +Cc: Matti Aarnio, Dave McCracken, Linux Memory Management
On Mon, 15 Jul 2002, Daniel Phillips wrote:
> None of these cases apply, the low bit is always masked off before being
> used as a pointer.
Too ugly to live.
Rik
--
Bravely reimplemented by the knights who say "NIH".
http://www.surriel.com/ http://distro.conectiva.com/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 16:34 ` Rik van Riel
@ 2002-07-15 16:42 ` Daniel Phillips
2002-07-15 20:57 ` Andrew Morton
0 siblings, 1 reply; 13+ messages in thread
From: Daniel Phillips @ 2002-07-15 16:42 UTC (permalink / raw)
To: Rik van Riel; +Cc: Matti Aarnio, Dave McCracken, Linux Memory Management
On Monday 15 July 2002 18:34, Rik van Riel wrote:
> On Mon, 15 Jul 2002, Daniel Phillips wrote:
>
> > None of these cases apply, the low bit is always masked off before being
> > used as a pointer.
>
> Too ugly to live.
That's a nonargument. I presume you weren't able to think of a
substantive reason.
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 16:42 ` Daniel Phillips
@ 2002-07-15 20:57 ` Andrew Morton
2002-07-16 4:50 ` Daniel Phillips
0 siblings, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2002-07-15 20:57 UTC (permalink / raw)
To: Daniel Phillips
Cc: Rik van Riel, Matti Aarnio, Dave McCracken, Linux Memory Management
Daniel Phillips wrote:
>
> On Monday 15 July 2002 18:34, Rik van Riel wrote:
> > On Mon, 15 Jul 2002, Daniel Phillips wrote:
> >
> > > None of these cases apply, the low bit is always masked off before being
> > > used as a pointer.
> >
> > Too ugly to live.
>
> That's a nonargument. I presume you weren't able to think of a
> substantive reason.
How about "Linus will roast our nuts if we do that"?
Plus accessing the same storage with both atomic and non-atomic
ops may be a problem on some hardware.
Let's wait until we run out of page flags first...
-
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 20:57 ` Andrew Morton
@ 2002-07-16 4:50 ` Daniel Phillips
0 siblings, 0 replies; 13+ messages in thread
From: Daniel Phillips @ 2002-07-16 4:50 UTC (permalink / raw)
To: Andrew Morton
Cc: Rik van Riel, Matti Aarnio, Dave McCracken, Linux Memory Management
On Monday 15 July 2002 22:57, Andrew Morton wrote:
> Daniel Phillips wrote:
> > On Monday 15 July 2002 18:34, Rik van Riel wrote:
> > > On Mon, 15 Jul 2002, Daniel Phillips wrote:
> > >
> > > > None of these cases apply, the low bit is always masked off before being
> > > > used as a pointer.
> > >
> > > Too ugly to live.
> >
> > That's a nonargument. I presume you weren't able to think of a
> > substantive reason.
>
> How about "Linus will roast our nuts if we do that"?
Unless someone can come up with a rational argument, I'd be forced to conclude
that Linus is superstitious.
> Plus accessing the same storage with both atomic and non-atomic
> ops may be a problem on some hardware.
Qu'est-ce que ca veux dire? We're protected under the pte_chain lock are we
not?
> Let's wait until we run out of page flags first...
Sure, I intend to lay claim to six of them in due course, that would leave a
mere eight for posterity.
Then there is the method I proposed for saving 8 bytes per pte_chain with
the help of an overloaded pointer. In what way does that not turn the ugly
duckling into a beautiful swan?
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 15:40 ` Matti Aarnio
2002-07-15 16:10 ` Daniel Phillips
@ 2002-07-15 16:30 ` Daniel Phillips
2002-07-15 16:55 ` Matti Aarnio
1 sibling, 1 reply; 13+ messages in thread
From: Daniel Phillips @ 2002-07-15 16:30 UTC (permalink / raw)
To: Matti Aarnio; +Cc: Dave McCracken, Linux Memory Management
On Monday 15 July 2002 17:40, Matti Aarnio wrote:
> In register-lacking i386 this masking is definite punishment..
Nonsense, the value needs to be loaded into a register anyway
before being used.
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 16:30 ` Daniel Phillips
@ 2002-07-15 16:55 ` Matti Aarnio
2002-07-15 17:50 ` Daniel Phillips
0 siblings, 1 reply; 13+ messages in thread
From: Matti Aarnio @ 2002-07-15 16:55 UTC (permalink / raw)
To: Daniel Phillips; +Cc: Linux Memory Management
On Mon, Jul 15, 2002 at 06:30:43PM +0200, Daniel Phillips wrote:
> On Monday 15 July 2002 17:40, Matti Aarnio wrote:
> > In register-lacking i386 this masking is definite punishment..
>
> Nonsense, the value needs to be loaded into a register anyway
> before being used.
Think in assembly, what is needed in i386 to mask the pointer ?
How the pointer is then used ? How many register you need ?
What registers can be used for masking arithmetics, and which
are usable in indexed memory reference address calculation ?
Linus seems to care about this kind of speed things, and
at least DaveM does look into gcc generated assembly to
verify, that used C idioms are compiled correctly and fast.
> Daniel
/Matti Aarnio
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Optimize away pte_chains for single mappings
2002-07-15 16:55 ` Matti Aarnio
@ 2002-07-15 17:50 ` Daniel Phillips
0 siblings, 0 replies; 13+ messages in thread
From: Daniel Phillips @ 2002-07-15 17:50 UTC (permalink / raw)
To: Matti Aarnio; +Cc: Linux Memory Management
On Monday 15 July 2002 18:55, Matti Aarnio wrote:
> On Mon, Jul 15, 2002 at 06:30:43PM +0200, Daniel Phillips wrote:
> > On Monday 15 July 2002 17:40, Matti Aarnio wrote:
> > > In register-lacking i386 this masking is definite punishment..
> >
> > Nonsense, the value needs to be loaded into a register anyway
> > before being used.
>
> Think in assembly, what is needed in i386 to mask the pointer ?
and <reg>, -2
(apologies for thinking in Intel assembly, old habits die hard)
> How the pointer is then used ?
Like any pointer.
> How many register you need ?
One.
> What registers can be used for masking arithmetics, and which
> are usable in indexed memory reference address calculation ?
No extra register for masking arithmetic.
> Linus seems to care about this kind of speed things, and
> at least DaveM does look into gcc generated assembly to
> verify, that used C idioms are compiled correctly and fast.
Yes, I guess I will generate the assembly code and have a look.
There is a lot more than just instructions/cycle counts to worry
about in code optimization. Other big considerations are cache
line hits, address generation interlocks and suitability of the
code for multiple execution units.
Getting down to nano-efficiency here, masking the address before
using it will generate a one cycle stall in one of the execution
pipes on classic pentium. That doesn't matter here - supposing
we use the low bit to indicate the non-direct case: the very
next thing we want to do after masking off the low bit is test
to see if the result is zero. Hey, our masking operation just
set the condition codes, isn't that nice. And the following jmp
instruction nicely fills the AGI slot.
Now I'm going to suggest an optimization that *is* really ugly:
note that in the current patch, the list always terminates with
null. But suppose instead it terminates with a pointer to a pte,
with the low bit set. We save 8 bytes per pte chain, and that is
not to be taken lightly.
--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2002-07-16 4:50 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-07-09 18:35 [PATCH] Optimize away pte_chains for single mappings Dave McCracken
2002-07-13 13:13 ` Daniel Phillips
2002-07-15 14:02 ` Dave McCracken
2002-07-15 14:56 ` Daniel Phillips
2002-07-15 15:40 ` Matti Aarnio
2002-07-15 16:10 ` Daniel Phillips
2002-07-15 16:34 ` Rik van Riel
2002-07-15 16:42 ` Daniel Phillips
2002-07-15 20:57 ` Andrew Morton
2002-07-16 4:50 ` Daniel Phillips
2002-07-15 16:30 ` Daniel Phillips
2002-07-15 16:55 ` Matti Aarnio
2002-07-15 17:50 ` Daniel Phillips
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox