From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <395A4CBA.D217F0FE@colorfullife.com> Date: Wed, 28 Jun 2000 21:06:34 +0200 From: Manfred Spraul MIME-Version: 1.0 Subject: Re: kmap_kiobuf() References: <200006281652.LAA19162@jen.americas.sgi.com> <20000628190612.E2392@redhat.com> Content-Type: multipart/mixed; boundary="------------3893FDF532C6E6D35E639D01" Sender: owner-linux-mm@kvack.org Return-Path: To: "Stephen C. Tweedie" Cc: lord@sgi.com, "Benjamin C.R. LaHaise" , David Woodhouse , linux-kernel@vger.rutgers.edu, linux-mm@kvack.org List-ID: This is a multi-part message in MIME format. --------------3893FDF532C6E6D35E639D01 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit "Stephen C. Tweedie" wrote: > > Hi, > > On Wed, Jun 28, 2000 at 11:52:40AM -0500, lord@sgi.com wrote: > > > > I am not a VM guy either, Ben, is the cost of the TLB flush mostly in > > the synchronization between CPUs, or is it just expensive anyway you > > look at it? > > The TLB IPI is by far the biggest factor here. > I tried it on my Dual Pentium II/350, 100 MHz FSB: * an empty IPI returns after ~ 1630 cpu ticks. * a tlb flush IPI needs ~ 2130 cpu ticks. The computer was idle, and obviously I only measure the cost as seen from the primary cpu, I don't know how long the second cpu needs until it returns from the interrupt. -- Manfred --------------3893FDF532C6E6D35E639D01 Content-Type: text/plain; charset=us-ascii; name="patch-newperf" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="patch-newperf" --- 2.4/drivers/net/dummy.c Sat Jun 24 11:07:56 2000 +++ build-2.4/drivers/net/dummy.c Wed Jun 28 20:55:44 2000 @@ -132,17 +132,171 @@ dummy_init(dev); return 0; } - static struct net_device dev_dummy = { "", 0, 0, 0, 0, 0x0, 0, 0, 0, 0, NULL, dummy_probe }; +/* kernel benchmark hook (C) Manfred Spraul manfreds@colorfullife.com */ + +int p_shift = -1; +MODULE_PARM (p_shift, "1i"); +MODULE_PARM_DESC(p_shift, "Shift for the profile buffer"); + +#define STAT_TABLELEN 16384 +static unsigned long totals[STAT_TABLELEN]; +static unsigned int overflows; + +static unsigned long long stime; +static void start_measure(void) +{ + __asm__ __volatile__ ( + ".align 64\n\t" + "pushal\n\t" + "cpuid\n\t" + "popal\n\t" + "rdtsc\n\t" + "movl %%eax,(%0)\n\t" + "movl %%edx,4(%0)\n\t" + : /* no output */ + : "c"(&stime) + : "eax", "edx", "memory" ); +} + +static void end_measure(void) +{ +static unsigned long long etime; + __asm__ __volatile__ ( + "pushal\n\t" + "cpuid\n\t" + "popal\n\t" + "rdtsc\n\t" + "movl %%eax,(%0)\n\t" + "movl %%edx,4(%0)\n\t" + : /* no output */ + : "c"(&etime) + : "eax", "edx", "memory" ); + { + unsigned long time = (unsigned long)(etime-stime); + time >>= p_shift; + if(time < STAT_TABLELEN) { + totals[time]++; + } else { + overflows++; + } + } +} + +static void clean_buf(void) +{ + memset(totals,0,sizeof(totals)); + overflows = 0; +} + +static void print_line(unsigned long* array) +{ + int i; + for(i=0;i<32;i++) { + if((i%32)==16) + printk(":"); + printk("%lx ",array[i]); + } +} + +static void print_buf(char* caption) +{ + int i, other = 0; + printk("Results - %s - shift %d", + caption, p_shift); + + for(i=0;i