#include #include /* Note: this program is written for Xeon */ /* * Stolen from Linux. * */ #define ADDR (*(volatile long *) addr) /* * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * This function is atomic and may not be reordered. See __set_bit() * if you do not require the atomic guarantees. * * Note: there are no guarantees that this function will not be reordered * on non x86 architectures, so if you are writting portable code, * make sure not to rely on its reordering guarantees. * * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static inline void set_bit(int nr, volatile unsigned long * addr) { __asm__ __volatile__( "lock ;" "btsl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } /** * __set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static inline void __set_bit(int nr, volatile unsigned long * addr) { __asm__( "btsl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } #define rdtsc(low,high) \ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) /* * Test params. * */ #define CACHESIZE (512 * 1024) /* L2 cache size */ #define LCACHESIZE CACHESIZE/sizeof(long) #define PAGESIZE 4096 #define LPAGESIZE PAGESIZE/sizeof(long) #define MAX_TRY (100) #define NOCACHEMISS_NOATOMIC 0 #define NOCACHEMISS_ATOMIC 1 #define NOCACHEMISS_MIXTURE 2 #define NOATOMIC 3 #define ATOMIC 4 #define MIXTURE 5 #define NOATOMIC_DEPEND 6 #define MIXTURE_DEPEND 7 #define NR_OPS 8 char message[NR_OPS][64]={ "cache hit, no atomic", "cache hit, atomic", "cache hit, mixture", "cache miss, no atomic", "cache miss, atomic", "cache miss, mixture", "cache hit, dependency, noatomic", "cache hit, dependency, mixture" }; #define LINESIZE 128 /* L2 line size */ #define LLINESIZE LINESIZE/sizeof(long) /* * function for preparing cache status */ void hot_cache(char *buffer,int size) { memset(buffer,0,size); return; } void cold_cache(char *buffer,int size) { unsigned long *addr; int i; addr = malloc(size); memset(addr,0,size); return; } #define prefetch(addr) \ __asm__ __volatile__ ("prefetcht0 %0":: "m" (addr)) int main(int argc, char *argv[]) { unsigned long long score[NR_OPS][MAX_TRY]; unsigned long long average_score[NR_OPS]; unsigned long *map, *addr; struct { unsigned long low; unsigned long high; } start,end; int try, i, j; unsigned long long lstart,lend; map = mmap(NULL, CACHESIZE, PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); for(try = 0; try < MAX_TRY; try++) { /* there is no page fault, cache hit */ hot_cache((char *)map, CACHESIZE); /* No atomic ops case */ rdtsc(start.low, start.high); for(addr = map;addr != map + LCACHESIZE; addr += LLINESIZE * 2) { __set_bit(1,map); __set_bit(2,map + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[NOCACHEMISS_NOATOMIC][try] = lend - lstart; /* there is no page fault, small cache miss */ hot_cache((char *)map, CACHESIZE); /* atomic ops case */ rdtsc(start.low, start.high); for(addr = map;addr != map + LCACHESIZE; addr += LLINESIZE * 2) { set_bit(1,map); set_bit(2,map + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[NOCACHEMISS_ATOMIC][try] = lend - lstart; /* there is no page fault, small cache miss */ hot_cache((char *)map, CACHESIZE); /* mixture case */ rdtsc(start.low, start.high); for(addr = map;addr != map + LCACHESIZE; addr += LLINESIZE * 2) { __set_bit(1,map); set_bit(2,map + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[NOCACHEMISS_MIXTURE][try] = lend - lstart; /* expire cache */ cold_cache((char *)map, CACHESIZE); /* ATOMIC_ONLY case */ rdtsc(start.low, start.high); for(addr = map; addr != map + LCACHESIZE; addr += LLINESIZE*2){ __set_bit(1,addr); __set_bit(2,addr + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[NOATOMIC][try] = lend - lstart; /* expire cache */ cold_cache((char *)map, CACHESIZE); /* ATOMIC_ONLY case */ rdtsc(start.low, start.high); for(addr = map; addr != map + LCACHESIZE; addr += LLINESIZE * 2){ set_bit(1,addr); set_bit(2,addr + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[ATOMIC][try] = lend - lstart; /* expire cache */ cold_cache((char *)map, CACHESIZE); /* MIXTURE case */ rdtsc(start.low, start.high); for(addr = map; addr != map + LCACHESIZE; addr += LLINESIZE * 2){ __set_bit(1,addr); set_bit(2,addr + LLINESIZE); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[MIXTURE][try] = lend - lstart; /* hot cache */ hot_cache((char *)map, CACHESIZE); /* case with dependency */ rdtsc(start.low, start.high); for(addr = map; addr != map + LCACHESIZE; addr += LLINESIZE * 2){ __set_bit(1,addr); __set_bit(2,addr); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[NOATOMIC_DEPEND][try] = lend - lstart; /* expire cache */ hot_cache((char *)map, CACHESIZE); /* case with depndency */ rdtsc(start.low, start.high); for(addr = map; addr != map + LCACHESIZE; addr += LLINESIZE * 2){ __set_bit(1,addr); set_bit(2,addr); } rdtsc(end.low, end.high); lstart = (unsigned long long)start.high << 32 | start.low; lend = (unsigned long long)end.high << 32 | end.low; score[MIXTURE_DEPEND][try] = lend - lstart; } for(j = 0; j < NR_OPS; j++) { average_score[j] = 0; for(i = 0; i < try; i++) { average_score[j] += score[j][i]; } printf("score %d is %16lld note: %s\n",j,average_score[j]/try, message[j]); } return ; }