linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>, Hugh Dickins <hugh@veritas.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linux Memory Management List <linux-mm@kvack.org>,
	tee@sgi.com, holt@sgi.com, Andrea Arcangeli <andrea@suse.de>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Re: [rfc] no ZERO_PAGE?
Date: Wed, 4 Apr 2007 18:32:20 +0200	[thread overview]
Message-ID: <20070404183220.2455465b.dada1@cosmosbay.com> (raw)
In-Reply-To: <Pine.LNX.4.64.0704040830500.6730@woody.linux-foundation.org>

On Wed, 4 Apr 2007 08:35:30 -0700 (PDT)
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> Anyway, I'm not against this, but I can see somebody actually *wanting* 
> the ZERO page in some cases. I've used the fact for TLB testing, for 
> example, by just doing a big malloc(), and knowing that the kernel will 
> re-use the ZERO_PAGE so that I don't get any cache effects (well, at least 
> not any *physical* cache effects. Virtually indexed cached will still show 
> effects of it, of course, but I haven't cared).
> 
> That's an example of an app that actually cares about the page allocation 
> (or, in this case, the lack there-of). Not an important one, but maybe 
> there are important ones that care?

I dont know if this small prog is of any interest :

But results on an Intel Pentium-M are interesting, in particular 2) & 3)

If a page is first allocated as page_zero then cow to a full rw page, this is more expensive.
(2660 cycles instead of 2300)

Is there an app somewhere that depends on 2) being ultra-fast but then future write accesses *slow* ???

$ ./page_bench >RES; cat RES
1) pagefault tp bring a rw page:
Poke (addr=0x804c000): 2360 cycles
1) pagefault to bring a rw page:
Poke (addr=0x804d000): 2368 cycles
1) pagefault to bring a rw page:
Poke (addr=0x804e000): 2120 cycles
2) pagefault to bring a zero page, readonly
Peek(addr=0x804f000): ->0 891 cycles
3) pagefault to make this page rw
Poke (addr=0x804f000): 2660 cycles
1) pagefault to bring a rw page:
Poke (addr=0x8050000): 2099 cycles
1) pagefault to bring a rw page:
Poke (addr=0x8051000): 2062 cycles
4) memset 4096 bytes to 0x55:
Poke_full (addr=0x804f000, len=4096): 2719 cycles
5) fill the whole table
Poke_full (addr=0x804c000, len=4194304): 6563661 cycles
6) fill again whole table (no more faults, but cpu cache too small)
Poke_full (addr=0x804c000, len=4194304): 5188925 cycles
7.1) faulting a mmap zone, read access
Peek(addr=0xb7f8a000): ->0 40453 cycles
8.1) faulting a mmap zone, write access
Poke (addr=0xb7f89000): 10599 cycles
7.2) faulting a mmap zone, read access
Peek(addr=0xb7f88000): ->0 8167 cycles
8.3) faulting a mmap zone, write access
Poke (addr=0xb7f87000): 5701 cycles


$ cat page_bench.c

# include <errno.h>
# include <stdlib.h>
# include <unistd.h>
# include <fcntl.h>
# include <stdio.h>
# include <sys/time.h>
# include <time.h>
# include <sys/mman.h>
# include <string.h>

#ifdef __x86_64

#define rdtscll(val) do { \
     unsigned int __a,__d; \
     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
} while(0)

#elif  __i386

#define rdtscll(val) \
     __asm__ __volatile__("rdtsc" : "=A" (val))

#endif

int var;



int *addr1, *addr2, *addr3, *addr4;

void map_many_vmas(unsigned int nb)
{
size_t sz = getpagesize();
int ui;
for (ui = 0 ; ui < nb ; ui++) {
	void *p = mmap(NULL, sz,
			(ui == 0) ? PROT_READ : PROT_READ|PROT_WRITE,
			(ui & 1) ? MAP_PRIVATE|MAP_ANONYMOUS : MAP_ANONYMOUS|MAP_SHARED, -1, 0);
	if (p == (void *)-1) {
		fprintf(stderr, "Only %u mappings could be set\n", ui);
		break;
		}
	if (!addr1) addr1 = (int *)p;
	else if (!addr2) addr2 = (int *)p;
	else if (!addr3) addr3 = (int *)p;
	else if (!addr4) addr4 = (int *)p;
	}
}

void show_maps()
{
char buffer[4096];
int fd, lu;

fd = open("/proc/self/maps", 0);
if (fd != -1) {
	while ((lu = read(fd, buffer, sizeof(buffer))) > 0)
		write(2, buffer, lu);
	close(fd);
	}
}

void poke_int(void *addr, int val)
{
unsigned long long start, end;
long delta;
	rdtscll(start);
	*(int *)addr = val;
	rdtscll(end);
	delta = (end - start);
	printf("Poke (addr=%p): %ld cycles\n", addr, delta);
}

void poke_full(void *addr, int val, int len)
{
unsigned long long start, end;
long delta;
	rdtscll(start);
	memset(addr, val, len);
	rdtscll(end);
	delta = (end - start);
	printf("Poke_full (addr=%p, len=%d): %ld cycles\n", addr, len, delta);
}

int  peek_int(void *addr)
{
unsigned long long start, end;
long delta;
int val;
	rdtscll(start);
	val = *(int *)addr;
	rdtscll(end);
	delta = (end - start);
	printf("Peek(addr=%p): ->%d %ld cycles\n", addr, val, delta);
	return val;
}

int big_table[1024*1024] __attribute__((aligned(4096)));

void usage(int code)
{
fprintf(stderr, "Usage : page_bench [-m mappings]\n");
exit(code);
}

int main(int argc, char *argv[])
{
	unsigned int nb_mappings = 200;
	int c;

	while ((c = getopt(argc, argv, "Vm:")) != EOF) {
		if (c == 'm')
			nb_mappings = atoi(optarg);
		else if (c == 'V')
			usage(0);
	}
	if (nb_mappings < 4)
		nb_mappings = 4;
	map_many_vmas(nb_mappings);
//	show_maps();
	printf("1) pagefault tp bring a rw page:\n") ;
		poke_int(&big_table[0], 10);
	printf("1) pagefault to bring a rw page:\n") ;
		poke_int(&big_table[1024], 10);
	printf("1) pagefault to bring a rw page:\n") ;
		poke_int(&big_table[2048], 10);
	printf("2) pagefault to bring a zero page, readonly\n");
		peek_int(&big_table[3*1024]);
	printf("3) pagefault to make this page rw\n");
		poke_int(&big_table[3*1024], 10);

	printf("1) pagefault to bring a rw page:\n") ;
	poke_int(&big_table[4*1024], 10);
	printf("1) pagefault to bring a rw page:\n") ;
	poke_int(&big_table[5*1024], 10);

	printf("4) memset 4096 bytes to 0x55:\n");
	poke_full(&big_table[3*1024], 0x55, 4096);

	printf("5) fill the whole table\n");
	poke_full(big_table, 1, sizeof(big_table));
	printf("6) fill again whole table (no more faults, but cpu cache too small)\n");
	poke_full(big_table, 1, sizeof(big_table));

	printf("7.1) faulting a mmap zone, read access\n");
	peek_int(addr1);

	printf("8.1) faulting a mmap zone, write access\n");
	poke_int(addr2, 10);
	printf("7.2) faulting a mmap zone, read access\n");
	peek_int(addr3);
	printf("8.3) faulting a mmap zone, write access\n");
	poke_int(addr4, 10);

	return 0;
}


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-04-04 16:32 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-29  7:58 [rfc][patch 1/2] mm: dont account ZERO_PAGE Nick Piggin
2007-03-29  7:58 ` [rfc][patch 2/2] mips: reinstate move_pte Nick Piggin
2007-03-29 17:49   ` Linus Torvalds
2007-03-29 13:10 ` [rfc][patch 1/2] mm: dont account ZERO_PAGE Hugh Dickins
2007-03-30  1:46   ` Nick Piggin
2007-03-30  2:59     ` Robin Holt
2007-03-30  3:09       ` Nick Piggin
2007-03-30  9:23         ` Robin Holt
2007-03-30  2:40   ` Nick Piggin
2007-04-04  3:37     ` [rfc] no ZERO_PAGE? Nick Piggin
2007-04-04  9:45       ` Hugh Dickins
2007-04-04 10:24         ` Nick Piggin
2007-04-04 12:27           ` Andrea Arcangeli
2007-04-04 13:55             ` Dan Aloni
2007-04-04 14:14               ` Andrea Arcangeli
2007-04-04 14:44                 ` Dan Aloni
2007-04-04 15:03                   ` Hugh Dickins
2007-04-04 15:34                     ` Andrea Arcangeli
2007-04-04 15:41                       ` Hugh Dickins
2007-04-04 16:07                         ` Andrea Arcangeli
2007-04-04 16:14                         ` Linus Torvalds
2007-04-04 15:27                   ` Andrea Arcangeli
2007-04-04 16:15                     ` Dan Aloni
2007-04-04 16:48                       ` Andrea Arcangeli
2007-04-04 12:45           ` Hugh Dickins
2007-04-04 13:05             ` Andrea Arcangeli
2007-04-04 13:32               ` Hugh Dickins
2007-04-04 13:40                 ` Andrea Arcangeli
2007-04-04 15:35       ` Linus Torvalds
2007-04-04 15:48         ` Andrea Arcangeli
2007-04-04 16:09           ` Linus Torvalds
2007-04-04 16:23             ` Andrea Arcangeli
2007-04-04 16:10           ` Hugh Dickins
2007-04-04 16:31             ` Andrea Arcangeli
2007-04-04 22:07           ` Valdis.Kletnieks
2007-04-04 16:32         ` Eric Dumazet [this message]
2007-04-04 17:02           ` Linus Torvalds
2007-04-04 19:15         ` Andrew Morton
2007-04-04 20:11         ` David Miller, Linus Torvalds
2007-04-04 20:50           ` Andrew Morton
2007-04-05  2:03           ` Nick Piggin
2007-04-05  5:23           ` Andrea Arcangeli
2007-04-04 22:05         ` Valdis.Kletnieks
2007-04-05  0:27           ` Linus Torvalds
2007-04-05  1:25             ` Valdis.Kletnieks
2007-04-05  2:30             ` Nick Piggin
2007-04-05  5:37               ` William Lee Irwin III
2007-04-05 17:23                 ` Valdis.Kletnieks
2007-04-05  4:47         ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070404183220.2455465b.dada1@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrea@suse.de \
    --cc=holt@sgi.com \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    --cc=tee@sgi.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox