diff -urN -X dontdiff linux-2.5.59/include/linux/seqlock.h linux-2.5-seqlock/include/linux/seqlock.h --- linux-2.5.59/include/linux/seqlock.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5-seqlock/include/linux/seqlock.h 2003-01-30 14:57:05.000000000 -0800 @@ -0,0 +1,145 @@ +#ifndef __LINUX_SEQLOCK_H +#define __LINUX_SEQLOCK_H +/* + * Reader/writer consistent mechanism without starving writers. This type of + * lock for data where the reader wants a consitent set of information + * and is willing to retry if the information changes. Readers never + * block but they may have to retry if a writer is in + * progress. Writers do not wait for readers. + * + * This is not as cache friendly as brlock. Also, this will not work + * for data that contains pointers, because any writer could + * invalidate a pointer that a reader was following. + * + * Expected reader usage: + * do { + * seq = seq_read_begin(&foo); + * ... + * } while (seq_read_end(&foo, seq)); + * + * + * Based on x86_64 vsyscall gettimeofday + * by Keith Owens and Andrea Arcangeli + */ + +#include +#include +#include + +typedef struct { + volatile unsigned counter; +} seqcounter_t; + +#define SEQ_INIT (seqcounter_t) { 0 } +#define seq_init(x) do { *(x) = SEQ_INIT; } while (0) + +/* Update sequence count only + * Assumes caller is doing own mutual exclusion with other lock + * or semaphore. + */ +static inline void seq_write_begin(seqcounter_t *s) +{ + preempt_disable(); + ++s->counter; + wmb(); +} + +static inline void seq_write_end(seqcounter_t *s) +{ + wmb(); + s->counter++; + preempt_enable(); +} + + +/* Start of read calculation -- fetch last complete writer token */ +static inline unsigned seq_read_begin(const seqcounter_t *s) +{ + unsigned ret = s->counter; + mb(); + return ret; +} + +/* End of read calculation -- check if sequence matches */ +static inline int seq_read_end(const seqcounter_t *s, unsigned iv) +{ + mb(); + return (s->counter != iv) || (iv & 1); +} + +/* Combination of spinlock for writing and sequence update for readers */ +typedef struct { + seqcounter_t seq; + spinlock_t lock; +} seqlock_t; + +/* + * These macros triggered gcc-3.x compile-time problems. We think these are + * OK now. Be cautious. + */ +#define SEQ_LOCK_UNLOCKED { SEQ_INIT, SPIN_LOCK_UNLOCKED } +#define seqlock_init(x) do { *(x) = (seqlock_t) SEQ_LOCK_UNLOCKED; } while (0) + +/* Lock out other writers and update the count. + * Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void seq_write_lock(seqlock_t *rw) +{ + spin_lock(&rw->lock); + ++rw->seq.counter; + wmb(); +} + +static inline void seq_write_unlock(seqlock_t *rw) +{ + wmb(); + rw->seq.counter++; + spin_unlock(&rw->lock); +} + +static inline int seq_write_trylock(seqlock_t *rw) +{ + int ret = spin_trylock(&rw->lock); + + if (ret) { + ++rw->seq.counter; + wmb(); + } + return ret; +} + +/* Version of seq_read_begin/end for use with seqlock */ +#define seq_read_lock(slock) \ + seq_read_begin(&(slock)->seq) +#define seq_read_unlock(slock,iv) \ + unlikely(seq_read_end(&(slock)->seq, iv)) + + +/* + * Possible sw/hw IRQ protected versions of the interfaces. + */ +#define seq_write_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); seq_write_lock(lock); } while (0) +#define seq_write_lock_irq(lock) \ + do { local_irq_disable(); seq_write_lock(lock); } while (0) +#define seq_write_lock_bh(lock) \ + do { local_bh_disable(); seq_write_lock(lock); } while (0) + +#define seq_write_unlock_irqrestore(lock, flags) \ + do { seq_write_unlock(lock); local_irq_restore(flags); } while(0) +#define seq_write_unlock_irq(lock) \ + do { seq_write_unlock(lock); local_irq_enable(); } while(0) +#define seq_write_unlock_bh(lock) \ + do { seq_write_unlock(lock); local_bh_enable(); } while(0) + +#define seq_read_lock_irqsave(lock, flags) \ + ({ local_irq_save(flags); seqlock_read_begin(lock); }) + +#define seq_read_lock_irqrestore(lock, iv, flags) \ + unlikely({int ret = seq_read_end(&(lock)->seq, iv); \ + local_irq_save(flags); \ + ret; \ + }) + +#endif /* __LINUX_SEQLOCK_H */ diff -urN -X dontdiff linux-2.5.59/include/linux/time.h linux-2.5-seqlock/include/linux/time.h --- linux-2.5.59/include/linux/time.h 2003-01-16 18:22:20.000000000 -0800 +++ linux-2.5-seqlock/include/linux/time.h 2003-01-30 11:02:49.000000000 -0800 @@ -25,6 +25,7 @@ #ifdef __KERNEL__ #include +#include /* * Change timeval to jiffies, trying to avoid the @@ -120,7 +121,7 @@ } extern struct timespec xtime; -extern rwlock_t xtime_lock; +extern seqlock_t xtime_lock; static inline unsigned long get_seconds(void) { diff -urN -X dontdiff linux-2.5.59/kernel/time.c linux-2.5-seqlock/kernel/time.c --- linux-2.5.59/kernel/time.c 2003-01-16 18:21:45.000000000 -0800 +++ linux-2.5-seqlock/kernel/time.c 2003-01-30 11:29:29.000000000 -0800 @@ -27,7 +27,6 @@ #include #include #include - #include /* @@ -38,7 +37,6 @@ /* The xtime_lock is not only serializing the xtime read/writes but it's also serializing all accesses to the global NTP variables now. */ -extern rwlock_t xtime_lock; extern unsigned long last_time_offset; #if !defined(__alpha__) && !defined(__ia64__) @@ -80,7 +78,7 @@ return -EPERM; if (get_user(value, tptr)) return -EFAULT; - write_lock_irq(&xtime_lock); + seq_write_lock_irq(&xtime_lock); xtime.tv_sec = value; xtime.tv_nsec = 0; last_time_offset = 0; @@ -88,7 +86,7 @@ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); + seq_write_unlock_irq(&xtime_lock); return 0; } @@ -96,13 +94,13 @@ asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) { - if (tv) { + if (likely(tv != NULL)) { struct timeval ktv; do_gettimeofday(&ktv); if (copy_to_user(tv, &ktv, sizeof(ktv))) return -EFAULT; } - if (tz) { + if (unlikely(tz != NULL)) { if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) return -EFAULT; } @@ -127,10 +125,10 @@ */ inline static void warp_clock(void) { - write_lock_irq(&xtime_lock); + seq_write_lock_irq(&xtime_lock); xtime.tv_sec += sys_tz.tz_minuteswest * 60; last_time_offset = 0; - write_unlock_irq(&xtime_lock); + seq_write_unlock_irq(&xtime_lock); } /* @@ -235,7 +233,7 @@ txc->tick > 1100000/USER_HZ) return -EINVAL; - write_lock_irq(&xtime_lock); + seq_write_lock_irq(&xtime_lock); result = time_state; /* mostly `TIME_OK' */ /* Save for later - semantics of adjtime is to return old value */ @@ -386,7 +384,7 @@ txc->errcnt = pps_errcnt; txc->stbcnt = pps_stbcnt; last_time_offset = 0; - write_unlock_irq(&xtime_lock); + seq_write_unlock_irq(&xtime_lock); do_gettimeofday(&txc->time); return(result); } @@ -409,9 +407,12 @@ struct timespec current_kernel_time(void) { struct timespec now; - unsigned long flags; - read_lock_irqsave(&xtime_lock,flags); - now = xtime; - read_unlock_irqrestore(&xtime_lock,flags); + unsigned long seq; + + do { + seq = seq_read_lock(&xtime_lock); + now = xtime; + } while (seq_read_unlock(&xtime_lock, seq)); + return now; } diff -urN -X dontdiff linux-2.5.59/kernel/timer.c linux-2.5-seqlock/kernel/timer.c --- linux-2.5.59/kernel/timer.c 2003-01-16 18:21:49.000000000 -0800 +++ linux-2.5-seqlock/kernel/timer.c 2003-01-30 11:13:22.000000000 -0800 @@ -754,11 +754,8 @@ /* jiffies at the most recent update of wall time */ unsigned long wall_jiffies; -/* - * This read-write spinlock protects us from races in SMP while - * playing with xtime and avenrun. - */ -rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED; +seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQ_LOCK_UNLOCKED; + unsigned long last_time_offset; /* @@ -798,8 +795,7 @@ } /* - * The 64-bit jiffies value is not atomic - you MUST NOT read it - * without holding read_lock_irq(&xtime_lock). + * The 64-bit jiffies value is not atomic * jiffies is defined in the linker script... */ @@ -1087,18 +1083,21 @@ struct sysinfo val; unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; + unsigned long seq; memset((char *)&val, 0, sizeof(struct sysinfo)); - read_lock_irq(&xtime_lock); - val.uptime = jiffies / HZ; + do { + seq = seq_read_lock(&xtime_lock); + + val.uptime = jiffies / HZ; - val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); - val.procs = nr_threads; - read_unlock_irq(&xtime_lock); + val.procs = nr_threads; + } while (unlikely(seq_read_unlock(&xtime_lock, seq))); si_meminfo(&val); si_swapinfo(&val);