linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Bill Davidsen <davidsen@tmr.com>
To: Rik van Riel <riel@conectiva.com.br>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC][PATCH] iowait statistics
Date: Tue, 14 May 2002 21:31:30 -0400 (EDT)	[thread overview]
Message-ID: <Pine.LNX.3.96.1020514212343.2164A-200000@gatekeeper.tmr.com> (raw)
In-Reply-To: <Pine.LNX.4.44L.0205132214480.32261-100000@imladris.surriel.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 957 bytes --]

On Mon, 13 May 2002, Rik van Riel wrote:

> Hi,
> 
> the following patch implements iowait statistics in a simple way:

This follows some work I was doing back in 2.4.10 or so WRT just measuring
the delay caused by waiting for page IO. Attached is a patch against
2.4.19-pre8-ac3 which is the offspring of preempt-kernel and Rik's waitio
patch. All I can claim is that it still runs and boots, seems to have
working preempt, and returns reasonable numbers for iowait on a uni
processor. I intend to test more, but I know that this is a popular ac
version, and hopefully this will be useful.

  Until I try this on a real machine and see what tuning of things like
hdparm and elvtune do, test rmap again -aa, etc, this is an exercise in
getting it to work. I'd be interested in feedback, and I hope Rik will
continue his development.

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.

[-- Attachment #2: Type: TEXT/PLAIN, Size: 69768 bytes --]

*** ./Makefile	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./Makefile	Tue May 14 17:01:49 2002
***************
*** 1,7 ****
  VERSION = 2
  PATCHLEVEL = 4
  SUBLEVEL = 19
! EXTRAVERSION = -pre8-ac3
  
  KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
  
--- 1,7 ----
  VERSION = 2
  PATCHLEVEL = 4
  SUBLEVEL = 19
! EXTRAVERSION = -pre8-ac3.1p+iow
  
  KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
  
*** ./fs/buffer.c	Tue May 14 14:59:19 2002
--- ../linux-2.4.19-pre8-ac3p/./fs/buffer.c	Tue May 14 16:43:04 2002
***************
*** 154,164 ****
--- 154,166 ----
  	get_bh(bh);
  	add_wait_queue(&bh->b_wait, &wait);
  	do {
+ 		atomic_inc(&nr_iowait_tasks);
  		run_task_queue(&tq_disk);
  		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  		if (!buffer_locked(bh))
  			break;
  		schedule();
+ 		atomic_dec(&nr_iowait_tasks);
  	} while (buffer_locked(bh));
  	tsk->state = TASK_RUNNING;
  	remove_wait_queue(&bh->b_wait, &wait);
*** ./fs/exec.c	Tue May 14 14:59:19 2002
--- ../linux-2.4.19-pre8-ac3p/./fs/exec.c	Tue May 14 15:09:17 2002
***************
*** 427,434 ****
  		active_mm = current->active_mm;
  		current->mm = mm;
  		current->active_mm = mm;
- 		task_unlock(current);
  		activate_mm(active_mm, mm);
  		mm_release();
  		if (old_mm) {
  			if (active_mm != old_mm) BUG();
--- 427,434 ----
  		active_mm = current->active_mm;
  		current->mm = mm;
  		current->active_mm = mm;
  		activate_mm(active_mm, mm);
+ 		task_unlock(current);
  		mm_release();
  		if (old_mm) {
  			if (active_mm != old_mm) BUG();
*** ./fs/proc/proc_misc.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./fs/proc/proc_misc.c	Tue May 14 16:48:00 2002
***************
*** 273,279 ****
  	int i, len;
  	extern unsigned long total_forks;
  	unsigned long jif = jiffies;
! 	unsigned int sum = 0, user = 0, nice = 0, system = 0;
  	int major, disk;
  
  	for (i = 0 ; i < smp_num_cpus; i++) {
--- 273,279 ----
  	int i, len;
  	extern unsigned long total_forks;
  	unsigned long jif = jiffies;
! 	unsigned int sum = 0, user = 0, nice = 0, system = 0, iowait = 0;
  	int major, disk;
  
  	for (i = 0 ; i < smp_num_cpus; i++) {
***************
*** 282,304 ****
  		user += kstat.per_cpu_user[cpu];
  		nice += kstat.per_cpu_nice[cpu];
  		system += kstat.per_cpu_system[cpu];
  #if !defined(CONFIG_ARCH_S390)
  		for (j = 0 ; j < NR_IRQS ; j++)
  			sum += kstat.irqs[cpu][j];
  #endif
  	}
  
! 	len = sprintf(page, "cpu  %u %u %u %lu\n", user, nice, system,
! 		      jif * smp_num_cpus - (user + nice + system));
  	for (i = 0 ; i < smp_num_cpus; i++)
! 		len += sprintf(page + len, "cpu%d %u %u %u %lu\n",
  			i,
  			kstat.per_cpu_user[cpu_logical_map(i)],
  			kstat.per_cpu_nice[cpu_logical_map(i)],
  			kstat.per_cpu_system[cpu_logical_map(i)],
  			jif - (  kstat.per_cpu_user[cpu_logical_map(i)] \
  				   + kstat.per_cpu_nice[cpu_logical_map(i)] \
! 				   + kstat.per_cpu_system[cpu_logical_map(i)]));
  	len += sprintf(page + len,
  		"page %u %u\n"
  		"swap %u %u\n"
--- 282,307 ----
  		user += kstat.per_cpu_user[cpu];
  		nice += kstat.per_cpu_nice[cpu];
  		system += kstat.per_cpu_system[cpu];
+ 		iowait += kstat.per_cpu_iowait[cpu];
  #if !defined(CONFIG_ARCH_S390)
  		for (j = 0 ; j < NR_IRQS ; j++)
  			sum += kstat.irqs[cpu][j];
  #endif
  	}
  
! 	len = sprintf(page, "cpu  %u %u %u %lu %u\n", user, nice, system,
! 		      jif * smp_num_cpus - (user + nice + system),
! 		      iowait);
  	for (i = 0 ; i < smp_num_cpus; i++)
! 		len += sprintf(page + len, "cpu%d %u %u %u %lu %u\n",
  			i,
  			kstat.per_cpu_user[cpu_logical_map(i)],
  			kstat.per_cpu_nice[cpu_logical_map(i)],
  			kstat.per_cpu_system[cpu_logical_map(i)],
  			jif - (  kstat.per_cpu_user[cpu_logical_map(i)] \
  				   + kstat.per_cpu_nice[cpu_logical_map(i)] \
! 				   + kstat.per_cpu_system[cpu_logical_map(i)]),
! 			kstat.per_cpu_iowait[cpu_logical_map(i)]);
  	len += sprintf(page + len,
  		"page %u %u\n"
  		"swap %u %u\n"
*** ./fs/fat/cache.c	Fri Oct 12 16:48:42 2001
--- ../linux-2.4.19-pre8-ac3p/./fs/fat/cache.c	Tue May 14 15:09:17 2002
***************
*** 14,19 ****
--- 14,20 ----
  #include <linux/string.h>
  #include <linux/stat.h>
  #include <linux/fat_cvf.h>
+ #include <linux/sched.h>
  
  #if 0
  #  define PRINTK(x) printk x
*** ./fs/nls/nls_base.c	Tue May 14 14:55:54 2002
--- ../linux-2.4.19-pre8-ac3p/./fs/nls/nls_base.c	Tue May 14 15:09:18 2002
***************
*** 18,23 ****
--- 18,24 ----
  #ifdef CONFIG_KMOD
  #include <linux/kmod.h>
  #endif
+ #include <linux/sched.h>
  #include <linux/spinlock.h>
  
  static struct nls_table *tables;
*** ./fs/adfs/map.c	Thu Oct 25 16:53:53 2001
--- ../linux-2.4.19-pre8-ac3p/./fs/adfs/map.c	Tue May 14 15:09:17 2002
***************
*** 12,17 ****
--- 12,18 ----
  #include <linux/fs.h>
  #include <linux/adfs_fs.h>
  #include <linux/spinlock.h>
+ #include <linux/sched.h>
  
  #include "adfs.h"
  
*** ./kernel/sched.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./kernel/sched.c	Tue May 14 15:09:18 2002
***************
*** 165,174 ****
--- 165,176 ----
  	struct runqueue *rq;
  
  repeat_lock_task:
+ 	preempt_disable();
  	rq = task_rq(p);
  	spin_lock_irqsave(&rq->lock, *flags);
  	if (unlikely(rq != task_rq(p))) {
  		spin_unlock_irqrestore(&rq->lock, *flags);
+ 		preempt_enable();
  		goto repeat_lock_task;
  	}
  	return rq;
***************
*** 177,182 ****
--- 179,185 ----
  static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
  {
  	spin_unlock_irqrestore(&rq->lock, *flags);
+ 	preempt_enable();
  }
  
  /*
***************
*** 257,267 ****
--- 260,272 ----
  {
  	int need_resched;
  
+ 	preempt_disable();
  	need_resched = p->need_resched;
  	wmb();
  	set_tsk_need_resched(p);
  	if (!need_resched && (p->cpu != smp_processor_id()))
  		smp_send_reschedule(p->cpu);
+ 	preempt_enable();
  }
  
  #ifdef CONFIG_SMP
***************
*** 276,281 ****
--- 281,287 ----
  	runqueue_t *rq;
  
  repeat:
+ 	preempt_disable();
  	rq = task_rq(p);
  	while (unlikely(rq->curr == p)) {
  		cpu_relax();
***************
*** 284,292 ****
--- 290,300 ----
  	rq = task_rq_lock(p, &flags);
  	if (unlikely(rq->curr == p)) {
  		task_rq_unlock(rq, &flags);
+ 		preempt_enable();
  		goto repeat;
  	}
  	task_rq_unlock(rq, &flags);
+ 	preempt_enable();
  }
  
  /*
***************
*** 340,345 ****
--- 348,354 ----
  {
  	runqueue_t *rq;
  
+ 	preempt_disable();
  	rq = this_rq();
  	spin_lock_irq(&rq->lock);
  
***************
*** 357,362 ****
--- 366,372 ----
  	p->cpu = smp_processor_id();
  	activate_task(p, rq);
  	spin_unlock_irq(&rq->lock);
+ 	preempt_enable();
  }
  
  /*
***************
*** 384,390 ****
  			p->sleep_avg) / (EXIT_WEIGHT + 1);
  }
  
! #if CONFIG_SMP
  asmlinkage void schedule_tail(task_t *prev)
  {
  	spin_unlock_irq(&this_rq()->frozen);
--- 394,400 ----
  			p->sleep_avg) / (EXIT_WEIGHT + 1);
  }
  
! #if CONFIG_SMP || CONFIG_PREEMPT
  asmlinkage void schedule_tail(task_t *prev)
  {
  	spin_unlock_irq(&this_rq()->frozen);
***************
*** 739,744 ****
--- 749,755 ----
  	BUG_ON(in_interrupt());
  
  need_resched:
+ 	preempt_disable();
  	prev = current;
  	rq = this_rq();
  
***************
*** 746,751 ****
--- 757,769 ----
  	prev->sleep_timestamp = jiffies;
  	spin_lock_irq(&rq->lock);
  
+ 	/*
+ 	 * if entering from preempt_schedule, off a kernel preemption,
+ 	 * go straight to picking the next task.
+ 	 */
+ 	if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
+ 		goto pick_next_task;
+ 
  	switch (prev->state) {
  	case TASK_INTERRUPTIBLE:
  		if (unlikely(signal_pending(prev))) {
***************
*** 757,765 ****
  	case TASK_RUNNING:
  		;
  	}
- #if CONFIG_SMP
  pick_next_task:
- #endif
  	if (unlikely(!rq->nr_running)) {
  #if CONFIG_SMP
  		load_balance(rq, 1);
--- 775,781 ----
***************
*** 810,820 ****
--- 826,855 ----
  	}
  
  	reacquire_kernel_lock(current);
+ 	preempt_enable_no_resched();
  	if (need_resched())
  		goto need_resched;
  	return;
  }
  
+ #ifdef CONFIG_PREEMPT
+ /*
+  * this is is the entry point to schedule() from in-kernel preemption.
+ */
+ asmlinkage void preempt_schedule(void)
+ {
+ need_resched:
+ 	current->preempt_count += PREEMPT_ACTIVE;
+ 	schedule();
+  	current->preempt_count -= PREEMPT_ACTIVE;
+ 
+ 	/* we could miss a preemption between schedule() and now */
+  	barrier();
+ 	if (unlikely((current->need_resched)))
+ 		goto need_resched;
+ }
+ #endif /* CONFIG_PREEMP */
+ 
  /*
   * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
   * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small +ve
***************
*** 1192,1197 ****
--- 1227,1233 ----
  	runqueue_t *rq;
  	prio_array_t *array;
  
+ 	preempt_disable();
  	rq = this_rq();
  
  	/*
***************
*** 1220,1225 ****
--- 1256,1262 ----
  		__set_bit(current->prio, array->bitmap);
  	}
  	spin_unlock(&rq->lock);
+ 	preempt_enable_no_resched();
  
  	schedule();
  
***************
*** 1424,1429 ****
--- 1461,1469 ----
  	double_rq_unlock(idle_rq, rq);
  	set_tsk_need_resched(idle);
  	__restore_flags(flags);
+ 
+ 	/* Set the preempt count _outside_ the spinlocks! */
+ 	idle->preempt_count = (idle->lock_depth >= 0);
  }
  
  extern void init_timervecs(void);
***************
*** 1520,1525 ****
--- 1560,1566 ----
  	if (!new_mask)
  		BUG();
  
+ 	preempt_disable();
  	rq = task_rq_lock(p, &flags);
  	p->cpus_allowed = new_mask;
  	/*
***************
*** 1528,1534 ****
  	 */
  	if (new_mask & (1UL << p->cpu)) {
  		task_rq_unlock(rq, &flags);
! 		return;
  	}
  
  	init_MUTEX_LOCKED(&req.sem);
--- 1569,1575 ----
  	 */
  	if (new_mask & (1UL << p->cpu)) {
  		task_rq_unlock(rq, &flags);
! 		goto out;
  	}
  
  	init_MUTEX_LOCKED(&req.sem);
***************
*** 1538,1543 ****
--- 1579,1586 ----
  	wake_up_process(rq->migration_thread);
  
  	down(&req.sem);
+ out:
+ 	preempt_enable();
  }
  
  static int migration_thread(void * bind_cpu)
***************
*** 1592,1609 ****
  		cpu_dest = __ffs(p->cpus_allowed);
  		rq_dest = cpu_rq(cpu_dest);
  repeat:
! 		cpu_src = p->thread_info->cpu;
  		rq_src = cpu_rq(cpu_src);
  
  		local_irq_save(flags);
  		double_rq_lock(rq_src, rq_dest);
! 		if (p->thread_info->cpu != cpu_src) {
  			double_rq_unlock(rq_src, rq_dest);
  			local_irq_restore(flags);
  			goto repeat;
  		}
  		if (rq_src == rq) {
! 			p->thread_info->cpu = cpu_dest;
  			if (p->array) {
  				deactivate_task(p, rq_src);
  				activate_task(p, rq_dest);
--- 1635,1652 ----
  		cpu_dest = __ffs(p->cpus_allowed);
  		rq_dest = cpu_rq(cpu_dest);
  repeat:
! 		cpu_src = p->cpu;
  		rq_src = cpu_rq(cpu_src);
  
  		local_irq_save(flags);
  		double_rq_lock(rq_src, rq_dest);
! 		if (p->cpu != cpu_src) {
  			double_rq_unlock(rq_src, rq_dest);
  			local_irq_restore(flags);
  			goto repeat;
  		}
  		if (rq_src == rq) {
! 			p->cpu = cpu_dest;
  			if (p->array) {
  				deactivate_task(p, rq_src);
  				activate_task(p, rq_dest);
*** ./kernel/exit.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./kernel/exit.c	Tue May 14 15:09:18 2002
***************
*** 373,380 ****
  		/* more a memory barrier than a real lock */
  		task_lock(tsk);
  		tsk->mm = NULL;
- 		task_unlock(tsk);
  		enter_lazy_tlb(mm, current, smp_processor_id());
  		mmput(mm);
  	}
  }
--- 373,380 ----
  		/* more a memory barrier than a real lock */
  		task_lock(tsk);
  		tsk->mm = NULL;
  		enter_lazy_tlb(mm, current, smp_processor_id());
+ 		task_unlock(tsk);
  		mmput(mm);
  	}
  }
***************
*** 494,499 ****
--- 494,504 ----
  		panic("Attempted to kill init!");
  	tsk->flags |= PF_EXITING;
  	del_timer_sync(&tsk->real_timer);
+ 
+ 	if (unlikely(preempt_get_count()))
+ 		printk(KERN_ERR "%s[%d] exited with preempt_count %d\n",
+ 				current->comm, current->pid,
+ 				preempt_get_count());
  
  fake_volatile:
  #ifdef CONFIG_BSD_PROCESS_ACCT
*** ./kernel/fork.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./kernel/fork.c	Tue May 14 15:09:18 2002
***************
*** 640,645 ****
--- 640,652 ----
  	if (p->binfmt && p->binfmt->module)
  		__MOD_INC_USE_COUNT(p->binfmt->module);
  
+ #ifdef CONFIG_PREEMPT
+ 	/*
+ 	 * schedule_tail drops this_rq()->lock so compensate with a count
+ 	 * of 1.  Also, we want to start with kernel preemption disabled.
+ 	 */
+ 	p->preempt_count = 1;
+ #endif
  	p->did_exec = 0;
  	p->swappable = 0;
  	p->state = TASK_UNINTERRUPTIBLE;
*** ./kernel/ksyms.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./kernel/ksyms.c	Tue May 14 15:09:18 2002
***************
*** 442,447 ****
--- 442,450 ----
  EXPORT_SYMBOL(interruptible_sleep_on);
  EXPORT_SYMBOL(interruptible_sleep_on_timeout);
  EXPORT_SYMBOL(schedule);
+ #ifdef CONFIG_PREEMPT
+ EXPORT_SYMBOL(preempt_schedule);
+ #endif
  EXPORT_SYMBOL(schedule_timeout);
  EXPORT_SYMBOL(sys_sched_yield);
  EXPORT_SYMBOL(set_user_nice);
*** ./kernel/timer.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./kernel/timer.c	Tue May 14 18:42:46 2002
***************
*** 585,590 ****
--- 585,592 ----
  {
  	p->per_cpu_utime[cpu] += user;
  	p->per_cpu_stime[cpu] += system;
+ 	if (atomic_read(&nr_iowait_tasks) > 0)
+ 		kstat.per_cpu_iowait[cpu] += system;
  	do_process_times(p, user, system);
  	do_it_virt(p, user);
  	do_it_prof(p);
*** ./lib/dec_and_lock.c	Wed Oct  3 12:11:26 2001
--- ../linux-2.4.19-pre8-ac3p/./lib/dec_and_lock.c	Tue May 14 15:09:18 2002
***************
*** 1,5 ****
--- 1,6 ----
  #include <linux/module.h>
  #include <linux/spinlock.h>
+ #include <linux/sched.h>
  #include <asm/atomic.h>
  
  /*
*** ./mm/filemap.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./mm/filemap.c	Tue May 14 17:01:27 2002
***************
*** 45,50 ****
--- 45,51 ----
   */
  
  atomic_t page_cache_size = ATOMIC_INIT(0);
+ atomic_t nr_iowait_tasks = ATOMIC_INIT(0);
  unsigned int page_hash_bits;
  struct page **page_hash_table;
  
***************
*** 828,835 ****
--- 829,838 ----
  		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  		if (!PageLocked(page))
  			break;
+ 		atomic_inc(&nr_iowait_tasks);
  		sync_page(page);
  		schedule();
+ 		atomic_dec(&nr_iowait_tasks);
  	} while (PageLocked(page));
  	__set_task_state(tsk, TASK_RUNNING);
  	remove_wait_queue(waitqueue, &wait);
***************
*** 875,882 ****
--- 878,887 ----
  	for (;;) {
  		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  		if (PageLocked(page)) {
+ 			atomic_inc(&nr_iowait_tasks);
  			sync_page(page);
  			schedule();
+ 			atomic_dec(&nr_iowait_tasks);
  		}
  		if (!TryLockPage(page))
  			break;
*** ./mm/slab.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./mm/slab.c	Tue May 14 15:09:18 2002
***************
*** 49,55 ****
   *  constructors and destructors are called without any locking.
   *  Several members in kmem_cache_t and slab_t never change, they
   *	are accessed without any locking.
!  *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
   *  The non-constant members are protected with a per-cache irq spinlock.
   *
   * Further notes from the original documentation:
--- 49,56 ----
   *  constructors and destructors are called without any locking.
   *  Several members in kmem_cache_t and slab_t never change, they
   *	are accessed without any locking.
!  *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
!  *  	and local interrupts are disabled so slab code is preempt-safe.
   *  The non-constant members are protected with a per-cache irq spinlock.
   *
   * Further notes from the original documentation:
*** ./CREDITS	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./CREDITS	Tue May 14 15:09:17 2002
***************
*** 996,1003 ****
  
  N: Nigel Gamble
  E: nigel@nrg.org
- E: nigel@sgi.com
  D: Interrupt-driven printer driver
  S: 120 Alley Way
  S: Mountain View, California 94040
  S: USA
--- 996,1003 ----
  
  N: Nigel Gamble
  E: nigel@nrg.org
  D: Interrupt-driven printer driver
+ D: Preemptible kernel
  S: 120 Alley Way
  S: Mountain View, California 94040
  S: USA
*** ./include/linux/sched.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/sched.h	Tue May 14 18:09:05 2002
***************
*** 91,96 ****
--- 91,97 ----
  #define TASK_UNINTERRUPTIBLE	2
  #define TASK_ZOMBIE		4
  #define TASK_STOPPED		8
+ #define PREEMPT_ACTIVE		0x4000000
  
  #define __set_task_state(tsk, state_value)		\
  	do { (tsk)->state = (state_value); } while (0)
***************
*** 156,161 ****
--- 157,165 ----
  #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
  extern signed long FASTCALL(schedule_timeout(signed long timeout));
  asmlinkage void schedule(void);
+ #ifdef CONFIG_PREEMPT
+ asmlinkage void preempt_schedule(void);
+ #endif
  
  extern int schedule_task(struct tq_struct *task);
  extern void flush_scheduled_tasks(void);
***************
*** 291,297 ****
  	 * offsets of these are hardcoded elsewhere - touch with care
  	 */
  	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
! 	unsigned long flags;	/* per process flags, defined below */
  	int sigpending;
  	mm_segment_t addr_limit;	/* thread address space:
  					 	0-0xBFFFFFFF for user-thead
--- 295,301 ----
  	 * offsets of these are hardcoded elsewhere - touch with care
  	 */
  	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
! 	int preempt_count;	/* 0 => preemptable, <0 => BUG */
  	int sigpending;
  	mm_segment_t addr_limit;	/* thread address space:
  					 	0-0xBFFFFFFF for user-thead
***************
*** 317,322 ****
--- 321,327 ----
  	unsigned long policy;
  	unsigned long cpus_allowed;
  	unsigned int time_slice;
+ 	unsigned long flags;
  
  	task_t *next_task, *prev_task;
  
***************
*** 358,363 ****
--- 363,369 ----
  	struct tms times;
  	unsigned long start_time;
  	long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+ 	long per_cpu_iowait[NR_CPUS];
  /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
  	unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
  	int swappable:1;
***************
*** 942,947 ****
--- 948,958 ----
  {
  	return unlikely(current->need_resched);
  }
+ 
+ #define _TASK_STRUCT_DEFINED
+ #include <linux/dcache.h>
+ #include <linux/tqueue.h>
+ #include <linux/fs_struct.h>
  
  #endif /* __KERNEL__ */
  
*** ./include/linux/tqueue.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/tqueue.h	Tue May 14 15:20:25 2002
***************
*** 94,99 ****
--- 94,115 ----
  extern spinlock_t tqueue_lock;
  
  /*
+  * Call all "bottom halfs" on a given list.
+  */
+ 
+ extern void __run_task_queue(task_queue *list);
+ 
+ static inline void run_task_queue(task_queue *list)
+ {
+ 	if (TQ_ACTIVE(*list))
+ 		__run_task_queue(list);
+ }
+ 
+ #endif /* _LINUX_TQUEUE_H */
+ 
+ #if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+ #define _LINUX_TQUEUE_H_INLINES
+ /*
   * Queue a task on a tq.  Return non-zero if it was successfully
   * added.
   */
***************
*** 109,125 ****
  	}
  	return ret;
  }
! 
! /*
!  * Call all "bottom halfs" on a given list.
!  */
! 
! extern void __run_task_queue(task_queue *list);
! 
! static inline void run_task_queue(task_queue *list)
! {
! 	if (TQ_ACTIVE(*list))
! 		__run_task_queue(list);
! }
! 
! #endif /* _LINUX_TQUEUE_H */
--- 125,128 ----
  	}
  	return ret;
  }
! #endif
*** ./include/linux/kernel_stat.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/kernel_stat.h	Tue May 14 16:49:04 2002
***************
*** 18,24 ****
  struct kernel_stat {
  	unsigned int per_cpu_user[NR_CPUS],
  	             per_cpu_nice[NR_CPUS],
! 	             per_cpu_system[NR_CPUS];
  	unsigned int dk_drive[DK_MAX_MAJOR][DK_MAX_DISK];
  	unsigned int dk_drive_rio[DK_MAX_MAJOR][DK_MAX_DISK];
  	unsigned int dk_drive_wio[DK_MAX_MAJOR][DK_MAX_DISK];
--- 18,25 ----
  struct kernel_stat {
  	unsigned int per_cpu_user[NR_CPUS],
  	             per_cpu_nice[NR_CPUS],
! 	             per_cpu_system[NR_CPUS],
! 	             per_cpu_iowait[NR_CPUS];
  	unsigned int dk_drive[DK_MAX_MAJOR][DK_MAX_DISK];
  	unsigned int dk_drive_rio[DK_MAX_MAJOR][DK_MAX_DISK];
  	unsigned int dk_drive_wio[DK_MAX_MAJOR][DK_MAX_DISK];
*** ./include/linux/swap.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/swap.h	Tue May 14 16:49:58 2002
***************
*** 90,95 ****
--- 90,96 ----
  extern int nr_inactive_clean_pages;
  extern atomic_t page_cache_size;
  extern atomic_t buffermem_pages;
+ extern atomic_t nr_iowait_tasks;
  extern spinlock_cacheline_t pagecache_lock_cacheline;
  #define pagecache_lock (pagecache_lock_cacheline.lock)
  extern void __remove_inode_page(struct page *);
*** ./include/linux/smp_lock.h	Thu Nov 22 14:46:27 2001
--- ../linux-2.4.19-pre8-ac3p/./include/linux/smp_lock.h	Tue May 14 18:09:08 2002
***************
*** 3,9 ****
  
  #include <linux/config.h>
  
! #ifndef CONFIG_SMP
  
  #define lock_kernel()				do { } while(0)
  #define unlock_kernel()				do { } while(0)
--- 3,9 ----
  
  #include <linux/config.h>
  
! #if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
  
  #define lock_kernel()				do { } while(0)
  #define unlock_kernel()				do { } while(0)
*** ./include/linux/dcache.h	Tue May 14 14:55:57 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/dcache.h	Tue May 14 15:20:25 2002
***************
*** 126,156 ****
  
  extern spinlock_t dcache_lock;
  
- /**
-  * d_drop - drop a dentry
-  * @dentry: dentry to drop
-  *
-  * d_drop() unhashes the entry from the parent
-  * dentry hashes, so that it won't be found through
-  * a VFS lookup any more. Note that this is different
-  * from deleting the dentry - d_delete will try to
-  * mark the dentry negative if possible, giving a
-  * successful _negative_ lookup, while d_drop will
-  * just make the cache lookup fail.
-  *
-  * d_drop() is used mainly for stuff that wants
-  * to invalidate a dentry for some reason (NFS
-  * timeouts or autofs deletes).
-  */
- 
- static __inline__ void d_drop(struct dentry * dentry)
- {
- 	spin_lock(&dcache_lock);
- 	list_del(&dentry->d_hash);
- 	INIT_LIST_HEAD(&dentry->d_hash);
- 	spin_unlock(&dcache_lock);
- }
- 
  static __inline__ int dname_external(struct dentry *d)
  {
  	return d->d_name.name != d->d_iname; 
--- 126,131 ----
***************
*** 275,277 ****
--- 250,283 ----
  #endif /* __KERNEL__ */
  
  #endif	/* __LINUX_DCACHE_H */
+ 
+ #if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+ #define __LINUX_DCACHE_H_INLINES
+ 
+ #ifdef __KERNEL__
+ /**
+  * d_drop - drop a dentry
+  * @dentry: dentry to drop
+  *
+  * d_drop() unhashes the entry from the parent
+  * dentry hashes, so that it won't be found through
+  * a VFS lookup any more. Note that this is different
+  * from deleting the dentry - d_delete will try to
+  * mark the dentry negative if possible, giving a
+  * successful _negative_ lookup, while d_drop will
+  * just make the cache lookup fail.
+  *
+  * d_drop() is used mainly for stuff that wants
+  * to invalidate a dentry for some reason (NFS
+  * timeouts or autofs deletes).
+  */
+ 
+ static __inline__ void d_drop(struct dentry * dentry)
+ {
+ 	spin_lock(&dcache_lock);
+ 	list_del(&dentry->d_hash);
+ 	INIT_LIST_HEAD(&dentry->d_hash);
+ 	spin_unlock(&dcache_lock);
+ }
+ #endif
+ #endif
*** ./include/linux/smp.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/smp.h	Tue May 14 15:20:25 2002
***************
*** 81,87 ****
--- 81,89 ----
  #define smp_processor_id()			0
  #define hard_smp_processor_id()			0
  #define smp_threads_ready			1
+ #ifndef CONFIG_PREEMPT
  #define kernel_lock()
+ #endif
  #define cpu_logical_map(cpu)			0
  #define cpu_number_map(cpu)			0
  #define smp_call_function(func,info,retry,wait)	({ 0; })
*** ./include/linux/spinlock.h	Tue May 14 14:55:58 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/spinlock.h	Tue May 14 15:20:25 2002
***************
*** 2,7 ****
--- 2,8 ----
  #define __LINUX_SPINLOCK_H
  
  #include <linux/config.h>
+ #include <linux/compiler.h>
  
  /*
   * These are the generic versions of the spinlocks and read-write
***************
*** 62,69 ****
--- 63,72 ----
  
  #if (DEBUG_SPINLOCKS < 1)
  
+ #ifndef CONFIG_PREEMPT
  #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
  #define ATOMIC_DEC_AND_LOCK
+ #endif
  
  /*
   * Your basic spinlocks, allowing only a single CPU anywhere
***************
*** 79,89 ****
  #endif
  
  #define spin_lock_init(lock)	do { } while(0)
! #define spin_lock(lock)		(void)(lock) /* Not "unused variable". */
  #define spin_is_locked(lock)	(0)
! #define spin_trylock(lock)	({1; })
  #define spin_unlock_wait(lock)	do { } while(0)
! #define spin_unlock(lock)	do { } while(0)
  
  #elif (DEBUG_SPINLOCKS < 2)
  
--- 82,92 ----
  #endif
  
  #define spin_lock_init(lock)	do { } while(0)
! #define _raw_spin_lock(lock)	(void)(lock) /* Not "unused variable". */
  #define spin_is_locked(lock)	(0)
! #define _raw_spin_trylock(lock)	({1; })
  #define spin_unlock_wait(lock)	do { } while(0)
! #define _raw_spin_unlock(lock)	do { } while(0)
  
  #elif (DEBUG_SPINLOCKS < 2)
  
***************
*** 142,153 ****
  #endif
  
  #define rwlock_init(lock)	do { } while(0)
! #define read_lock(lock)		(void)(lock) /* Not "unused variable". */
! #define read_unlock(lock)	do { } while(0)
! #define write_lock(lock)	(void)(lock) /* Not "unused variable". */
! #define write_unlock(lock)	do { } while(0)
  
  #endif /* !SMP */
  
  /* "lock on reference count zero" */
  #ifndef ATOMIC_DEC_AND_LOCK
--- 145,219 ----
  #endif
  
  #define rwlock_init(lock)	do { } while(0)
! #define _raw_read_lock(lock)	(void)(lock) /* Not "unused variable". */
! #define _raw_read_unlock(lock)	do { } while(0)
! #define _raw_write_lock(lock)	(void)(lock) /* Not "unused variable". */
! #define _raw_write_unlock(lock)	do { } while(0)
  
  #endif /* !SMP */
+ 
+ #ifdef CONFIG_PREEMPT
+ 
+ #define preempt_get_count() (current->preempt_count)
+ 
+ #define preempt_disable() \
+ do { \
+ 	++current->preempt_count; \
+ 	barrier(); \
+ } while (0)
+ 
+ #define preempt_enable_no_resched() \
+ do { \
+ 	--current->preempt_count; \
+ 	barrier(); \
+ } while (0)
+ 
+ #define preempt_enable() \
+ do { \
+ 	--current->preempt_count; \
+ 	barrier(); \
+ 	if (unlikely(current->preempt_count < current->need_resched)) \
+ 		preempt_schedule(); \
+ } while (0)
+ 
+ #define spin_lock(lock)	\
+ do { \
+ 	preempt_disable(); \
+ 	_raw_spin_lock(lock); \
+ } while(0)
+ 
+ #define spin_trylock(lock)	({preempt_disable(); _raw_spin_trylock(lock) ? \
+ 				1 : ({preempt_enable(); 0;});})
+ #define spin_unlock(lock) \
+ do { \
+ 	_raw_spin_unlock(lock); \
+ 	preempt_enable(); \
+ } while (0)
+ 
+ #define read_lock(lock)		({preempt_disable(); _raw_read_lock(lock);})
+ #define read_unlock(lock)	({_raw_read_unlock(lock); preempt_enable();})
+ #define write_lock(lock)	({preempt_disable(); _raw_write_lock(lock);})
+ #define write_unlock(lock)	({_raw_write_unlock(lock); preempt_enable();})
+ #define write_trylock(lock)	({preempt_disable();_raw_write_trylock(lock) ? \
+ 				1 : ({preempt_enable(); 0;});})
+ 
+ #else
+ 
+ #define preempt_get_count()	(0)
+ #define preempt_disable()	do { } while (0)
+ #define preempt_enable_no_resched()	do {} while(0)
+ #define preempt_enable()	do { } while (0)
+ 
+ #define spin_lock(lock)		_raw_spin_lock(lock)
+ #define spin_trylock(lock)	_raw_spin_trylock(lock)
+ #define spin_unlock(lock)	_raw_spin_unlock(lock)
+ 
+ #define read_lock(lock)		_raw_read_lock(lock)
+ #define read_unlock(lock)	_raw_read_unlock(lock)
+ #define write_lock(lock)	_raw_write_lock(lock)
+ #define write_unlock(lock)	_raw_write_unlock(lock)
+ #define write_trylock(lock)	_raw_write_trylock(lock)
+ #endif
  
  /* "lock on reference count zero" */
  #ifndef ATOMIC_DEC_AND_LOCK
*** ./include/linux/fs_struct.h	Fri Jul 13 18:10:44 2001
--- ../linux-2.4.19-pre8-ac3p/./include/linux/fs_struct.h	Tue May 14 15:09:18 2002
***************
*** 20,25 ****
--- 20,34 ----
  extern void exit_fs(struct task_struct *);
  extern void set_fs_altroot(void);
  
+ struct fs_struct *copy_fs_struct(struct fs_struct *old);
+ void put_fs_struct(struct fs_struct *fs);
+ 
+ #endif
+ #endif
+ 
+ #if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
+ #define _LINUX_FS_STRUCT_H_INLINES
+ #ifdef __KERNEL__
  /*
   * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
   * It can block. Requires the big lock held.
***************
*** 65,73 ****
  		mntput(old_pwdmnt);
  	}
  }
- 
- struct fs_struct *copy_fs_struct(struct fs_struct *old);
- void put_fs_struct(struct fs_struct *fs);
- 
  #endif
  #endif
--- 74,78 ----
*** ./include/linux/brlock.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/linux/brlock.h	Tue May 14 15:20:27 2002
***************
*** 173,183 ****
  }
  
  #else
! # define br_read_lock(idx)	((void)(idx))
! # define br_read_unlock(idx)	((void)(idx))
! # define br_write_lock(idx)	((void)(idx))
! # define br_write_unlock(idx)	((void)(idx))
! #endif
  
  /*
   * Now enumerate all of the possible sw/hw IRQ protected
--- 173,183 ----
  }
  
  #else
! # define br_read_lock(idx)	({ (void)(idx); preempt_disable(); })
! # define br_read_unlock(idx)	({ (void)(idx); preempt_enable(); })
! # define br_write_lock(idx)	({ (void)(idx); preempt_disable(); })
! # define br_write_unlock(idx)	({ (void)(idx); preempt_enable(); })
! #endif	/* CONFIG_SMP */
  
  /*
   * Now enumerate all of the possible sw/hw IRQ protected
*** ./include/asm-i386/hardirq.h	Thu Nov 22 14:46:19 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/hardirq.h	Tue May 14 15:20:25 2002
***************
*** 36,41 ****
--- 36,43 ----
  
  #define synchronize_irq()	barrier()
  
+ #define release_irqlock(cpu)	do { } while (0)
+ 
  #else
  
  #include <asm/atomic.h>
*** ./include/asm-i386/softirq.h	Tue May 14 14:55:55 2002
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/softirq.h	Tue May 14 15:20:25 2002
***************
*** 5,13 ****
  #include <asm/hardirq.h>
  
  #define __cpu_bh_enable(cpu) \
! 		do { barrier(); local_bh_count(cpu)--; } while (0)
  #define cpu_bh_disable(cpu) \
! 		do { local_bh_count(cpu)++; barrier(); } while (0)
  
  #define local_bh_disable()	cpu_bh_disable(smp_processor_id())
  #define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
--- 5,13 ----
  #include <asm/hardirq.h>
  
  #define __cpu_bh_enable(cpu) \
! 		do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
  #define cpu_bh_disable(cpu) \
! 		do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
  
  #define local_bh_disable()	cpu_bh_disable(smp_processor_id())
  #define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
***************
*** 22,28 ****
   * If you change the offsets in irq_stat then you have to
   * update this code as well.
   */
! #define local_bh_enable()						\
  do {									\
  	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
  									\
--- 22,28 ----
   * If you change the offsets in irq_stat then you have to
   * update this code as well.
   */
! #define _local_bh_enable()						\
  do {									\
  	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
  									\
***************
*** 44,48 ****
--- 44,50 ----
  		: "r" (ptr), "i" (do_softirq)				\
  		/* no registers clobbered */ );				\
  } while (0)
+ 
+ #define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
  
  #endif	/* __ASM_SOFTIRQ_H */
*** ./include/asm-i386/spinlock.h	Tue May 14 14:55:55 2002
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/spinlock.h	Tue May 14 15:20:25 2002
***************
*** 77,83 ****
  		:"=m" (lock->lock) : : "memory"
  
  
! static inline void spin_unlock(spinlock_t *lock)
  {
  #if SPINLOCK_DEBUG
  	if (lock->magic != SPINLOCK_MAGIC)
--- 77,83 ----
  		:"=m" (lock->lock) : : "memory"
  
  
! static inline void _raw_spin_unlock(spinlock_t *lock)
  {
  #if SPINLOCK_DEBUG
  	if (lock->magic != SPINLOCK_MAGIC)
***************
*** 97,103 ****
  		:"=q" (oldval), "=m" (lock->lock) \
  		:"0" (oldval) : "memory"
  
! static inline void spin_unlock(spinlock_t *lock)
  {
  	char oldval = 1;
  #if SPINLOCK_DEBUG
--- 97,103 ----
  		:"=q" (oldval), "=m" (lock->lock) \
  		:"0" (oldval) : "memory"
  
! static inline void _raw_spin_unlock(spinlock_t *lock)
  {
  	char oldval = 1;
  #if SPINLOCK_DEBUG
***************
*** 113,119 ****
  
  #endif
  
! static inline int spin_trylock(spinlock_t *lock)
  {
  	char oldval;
  	__asm__ __volatile__(
--- 113,119 ----
  
  #endif
  
! static inline int _raw_spin_trylock(spinlock_t *lock)
  {
  	char oldval;
  	__asm__ __volatile__(
***************
*** 123,129 ****
  	return oldval > 0;
  }
  
! static inline void spin_lock(spinlock_t *lock)
  {
  #if SPINLOCK_DEBUG
  	__label__ here;
--- 123,129 ----
  	return oldval > 0;
  }
  
! static inline void _raw_spin_lock(spinlock_t *lock)
  {
  #if SPINLOCK_DEBUG
  	__label__ here;
***************
*** 179,185 ****
   */
  /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
  
! static inline void read_lock(rwlock_t *rw)
  {
  #if SPINLOCK_DEBUG
  	if (rw->magic != RWLOCK_MAGIC)
--- 179,185 ----
   */
  /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
  
! static inline void _raw_read_lock(rwlock_t *rw)
  {
  #if SPINLOCK_DEBUG
  	if (rw->magic != RWLOCK_MAGIC)
***************
*** 188,194 ****
  	__build_read_lock(rw, "__read_lock_failed");
  }
  
! static inline void write_lock(rwlock_t *rw)
  {
  #if SPINLOCK_DEBUG
  	if (rw->magic != RWLOCK_MAGIC)
--- 188,194 ----
  	__build_read_lock(rw, "__read_lock_failed");
  }
  
! static inline void _raw_write_lock(rwlock_t *rw)
  {
  #if SPINLOCK_DEBUG
  	if (rw->magic != RWLOCK_MAGIC)
***************
*** 197,206 ****
  	__build_write_lock(rw, "__write_lock_failed");
  }
  
! #define read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
! #define write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
  
! static inline int write_trylock(rwlock_t *lock)
  {
  	atomic_t *count = (atomic_t *)lock;
  	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
--- 197,206 ----
  	__build_write_lock(rw, "__write_lock_failed");
  }
  
! #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
! #define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
  
! static inline int _raw_write_trylock(rwlock_t *lock)
  {
  	atomic_t *count = (atomic_t *)lock;
  	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
*** ./include/asm-i386/smplock.h	Tue May 14 14:55:55 2002
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/smplock.h	Tue May 14 18:09:08 2002
***************
*** 11,17 ****
--- 11,25 ----
  extern spinlock_cacheline_t kernel_flag_cacheline;  
  #define kernel_flag kernel_flag_cacheline.lock      
  
+ #ifdef CONFIG_SMP
  #define kernel_locked()		spin_is_locked(&kernel_flag)
+ #else
+ #ifdef CONFIG_PREEMPT
+ #define kernel_locked()		preempt_get_count()
+ #else
+ #define kernel_locked()		1
+ #endif
+ #endif
  
  /*
   * Release global kernel lock and global interrupt lock
***************
*** 43,48 ****
--- 51,61 ----
   */
  static __inline__ void lock_kernel(void)
  {
+ #ifdef CONFIG_PREEMPT
+ 	if (current->lock_depth == -1)
+ 		spin_lock(&kernel_flag);
+ 	++current->lock_depth;
+ #else
  #if 1
  	if (!++current->lock_depth)
  		spin_lock(&kernel_flag);
***************
*** 54,59 ****
--- 67,73 ----
  		"\n9:"
  		:"=m" (__dummy_lock(&kernel_flag)),
  		 "=m" (current->lock_depth));
+ #endif
  #endif
  }
  
*** ./include/asm-i386/hw_irq.h	Thu Nov 22 14:46:18 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/hw_irq.h	Tue May 14 15:20:25 2002
***************
*** 95,100 ****
--- 95,112 ----
  #define __STR(x) #x
  #define STR(x) __STR(x)
  
+ #define GET_CURRENT \
+ 	"movl %esp, %ebx\n\t" \
+ 	"andl $-8192, %ebx\n\t"
+ 
+ #ifdef CONFIG_PREEMPT
+ #define BUMP_LOCK_COUNT \
+ 	GET_CURRENT \
+ 	"incl 4(%ebx)\n\t"
+ #else
+ #define BUMP_LOCK_COUNT
+ #endif
+ 
  #define SAVE_ALL \
  	"cld\n\t" \
  	"pushl %es\n\t" \
***************
*** 108,121 ****
  	"pushl %ebx\n\t" \
  	"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
  	"movl %edx,%ds\n\t" \
! 	"movl %edx,%es\n\t"
  
  #define IRQ_NAME2(nr) nr##_interrupt(void)
  #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
- 
- #define GET_CURRENT \
- 	"movl %esp, %ebx\n\t" \
- 	"andl $-8192, %ebx\n\t"
  
  /*
   *	SMP has a few special interrupts for IPI messages
--- 120,130 ----
  	"pushl %ebx\n\t" \
  	"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
  	"movl %edx,%ds\n\t" \
! 	"movl %edx,%es\n\t" \
! 	BUMP_LOCK_COUNT
  
  #define IRQ_NAME2(nr) nr##_interrupt(void)
  #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
  
  /*
   *	SMP has a few special interrupts for IPI messages
*** ./include/asm-i386/highmem.h	Tue May 14 14:55:55 2002
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/highmem.h	Tue May 14 15:20:25 2002
***************
*** 88,93 ****
--- 88,94 ----
  	enum fixed_addresses idx;
  	unsigned long vaddr;
  
+ 	preempt_disable();
  	if (page < highmem_start_page)
  		return page_address(page);
  
***************
*** 109,116 ****
  	unsigned long vaddr = (unsigned long) kvaddr;
  	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
  
! 	if (vaddr < FIXADDR_START) // FIXME
  		return;
  
  	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
  		out_of_line_bug();
--- 110,119 ----
  	unsigned long vaddr = (unsigned long) kvaddr;
  	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
  
! 	if (vaddr < FIXADDR_START) { // FIXME
! 		preempt_enable();
  		return;
+ 	}
  
  	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
  		out_of_line_bug();
***************
*** 122,127 ****
--- 125,132 ----
  	pte_clear(kmap_pte-idx);
  	__flush_tlb_one(vaddr);
  #endif
+ 
+ 	preempt_enable();
  }
  
  #endif /* __KERNEL__ */
*** ./include/asm-i386/pgalloc.h	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/pgalloc.h	Tue May 14 15:20:25 2002
***************
*** 75,94 ****
  {
  	unsigned long *ret;
  
  	if ((ret = pgd_quicklist) != NULL) {
  		pgd_quicklist = (unsigned long *)(*ret);
  		ret[0] = 0;
  		pgtable_cache_size--;
! 	} else
  		ret = (unsigned long *)get_pgd_slow();
  	return (pgd_t *)ret;
  }
  
  static inline void free_pgd_fast(pgd_t *pgd)
  {
  	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
  	pgd_quicklist = (unsigned long *) pgd;
  	pgtable_cache_size++;
  }
  
  static inline void free_pgd_slow(pgd_t *pgd)
--- 75,100 ----
  {
  	unsigned long *ret;
  
+ 	preempt_disable();
  	if ((ret = pgd_quicklist) != NULL) {
  		pgd_quicklist = (unsigned long *)(*ret);
  		ret[0] = 0;
  		pgtable_cache_size--;
! 		preempt_enable();
! 	} else {
! 		preempt_enable();
  		ret = (unsigned long *)get_pgd_slow();
+ 	}
  	return (pgd_t *)ret;
  }
  
  static inline void free_pgd_fast(pgd_t *pgd)
  {
+ 	preempt_disable();
  	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
  	pgd_quicklist = (unsigned long *) pgd;
  	pgtable_cache_size++;
+ 	preempt_enable();
  }
  
  static inline void free_pgd_slow(pgd_t *pgd)
***************
*** 119,129 ****
--- 125,137 ----
  {
  	unsigned long *ret;
  
+ 	preempt_disable();
  	if ((ret = (unsigned long *)pte_quicklist) != NULL) {
  		pte_quicklist = (unsigned long *)(*ret);
  		ret[0] = ret[1];
  		pgtable_cache_size--;
  	}
+ 	preempt_enable();
  	return (pte_t *)ret;
  }
  
***************
*** 135,146 ****
--- 143,156 ----
  extern int pgt_cache_water[];
  static inline void pte_free_fast(pte_t *pte)
  {
+ 	preempt_disable();
  	if (pgtable_cache_size < pgt_cache_water[1]) {
  		*(unsigned long *)pte = (unsigned long) pte_quicklist;
  		pte_quicklist = (unsigned long *) pte;
  		pgtable_cache_size++;
  	} else
  		pte_free_slow(pte);
+ 	preempt_enable();
  }
  
  #define pte_free(pte)		pte_free_fast(pte)
*** ./include/asm-i386/i387.h	Thu Nov 22 14:48:58 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-i386/i387.h	Tue May 14 18:09:08 2002
***************
*** 12,17 ****
--- 12,18 ----
  #define __ASM_I386_I387_H
  
  #include <linux/sched.h>
+ #include <linux/spinlock.h>
  #include <asm/processor.h>
  #include <asm/sigcontext.h>
  #include <asm/user.h>
***************
*** 24,30 ****
  extern void restore_fpu( struct task_struct *tsk );
  
  extern void kernel_fpu_begin(void);
! #define kernel_fpu_end() stts()
  
  
  #define unlazy_fpu( tsk ) do { \
--- 25,31 ----
  extern void restore_fpu( struct task_struct *tsk );
  
  extern void kernel_fpu_begin(void);
! #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
  
  
  #define unlazy_fpu( tsk ) do { \
*** ./include/asm-arm/dma.h	Sun Aug 12 14:14:00 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-arm/dma.h	Tue May 14 15:09:18 2002
***************
*** 5,10 ****
--- 5,11 ----
  
  #include <linux/config.h>
  #include <linux/spinlock.h>
+ #include <linux/sched.h>
  #include <asm/system.h>
  #include <asm/memory.h>
  #include <asm/scatterlist.h>
*** ./include/asm-arm/hardirq.h	Thu Oct 11 12:04:57 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-arm/hardirq.h	Tue May 14 15:09:18 2002
***************
*** 34,39 ****
--- 34,40 ----
  #define irq_exit(cpu,irq)	(local_irq_count(cpu)--)
  
  #define synchronize_irq()	do { } while (0)
+ #define release_irqlock(cpu)	do { } while (0)
  
  #else
  #error SMP not supported
*** ./include/asm-arm/smplock.h	Sun Aug 12 14:14:00 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-arm/smplock.h	Tue May 14 15:09:18 2002
***************
*** 3,14 ****
--- 3,19 ----
   *
   * Default SMP lock implementation
   */
+ #include <linux/config.h>
  #include <linux/interrupt.h>
  #include <linux/spinlock.h>
  
  extern spinlock_t kernel_flag;
  
+ #ifdef CONFIG_PREEMPT
+ #define kernel_locked()		preempt_get_count()
+ #else
  #define kernel_locked()		spin_is_locked(&kernel_flag)
+ #endif
  
  /*
   * Release global kernel lock and global interrupt lock
***************
*** 40,47 ****
--- 45,58 ----
   */
  static inline void lock_kernel(void)
  {
+ #ifdef CONFIG_PREEMPT
+ 	if (current->lock_depth == -1)
+ 		spin_lock(&kernel_flag);
+ 	++current->lock_depth;
+ #else
  	if (!++current->lock_depth)
  		spin_lock(&kernel_flag);
+ #endif
  }
  
  static inline void unlock_kernel(void)
*** ./include/asm-arm/softirq.h	Sat Sep  8 15:02:31 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-arm/softirq.h	Tue May 14 15:09:18 2002
***************
*** 5,24 ****
  #include <asm/hardirq.h>
  
  #define __cpu_bh_enable(cpu) \
! 		do { barrier(); local_bh_count(cpu)--; } while (0)
  #define cpu_bh_disable(cpu) \
! 		do { local_bh_count(cpu)++; barrier(); } while (0)
  
  #define local_bh_disable()	cpu_bh_disable(smp_processor_id())
  #define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
  
  #define in_softirq()		(local_bh_count(smp_processor_id()) != 0)
  
! #define local_bh_enable()						\
  do {									\
  	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
  	if (!--*ptr && ptr[-2])						\
  		__asm__("bl%? __do_softirq": : : "lr");/* out of line */\
  } while (0)
  
  #endif	/* __ASM_SOFTIRQ_H */
--- 5,26 ----
  #include <asm/hardirq.h>
  
  #define __cpu_bh_enable(cpu) \
! 		do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
  #define cpu_bh_disable(cpu) \
! 		do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
  
  #define local_bh_disable()	cpu_bh_disable(smp_processor_id())
  #define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
  
  #define in_softirq()		(local_bh_count(smp_processor_id()) != 0)
  
! #define _local_bh_enable()						\
  do {									\
  	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
  	if (!--*ptr && ptr[-2])						\
  		__asm__("bl%? __do_softirq": : : "lr");/* out of line */\
  } while (0)
+ 
+ #define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
  
  #endif	/* __ASM_SOFTIRQ_H */
*** ./include/asm-arm/pgalloc.h	Sun Aug 12 14:14:00 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-arm/pgalloc.h	Tue May 14 15:09:18 2002
***************
*** 57,96 ****
--- 57,104 ----
  {
  	unsigned long *ret;
  
+ 	preempt_disable();
  	if ((ret = pgd_quicklist) != NULL) {
  		pgd_quicklist = (unsigned long *)__pgd_next(ret);
  		ret[1] = ret[2];
  		clean_dcache_entry(ret + 1);
  		pgtable_cache_size--;
  	}
+ 	preempt_enable();
  	return (pgd_t *)ret;
  }
  
  static inline void free_pgd_fast(pgd_t *pgd)
  {
+ 	preempt_disable();
  	__pgd_next(pgd) = (unsigned long) pgd_quicklist;
  	pgd_quicklist = (unsigned long *) pgd;
  	pgtable_cache_size++;
+ 	preempt_enable();
  }
  
  static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
  {
  	unsigned long *ret;
  
+ 	preempt_disable();
  	if((ret = pte_quicklist) != NULL) {
  		pte_quicklist = (unsigned long *)__pte_next(ret);
  		ret[0] = 0;
  		clean_dcache_entry(ret);
  		pgtable_cache_size--;
  	}
+ 	preempt_enable();
  	return (pte_t *)ret;
  }
  
  static inline void free_pte_fast(pte_t *pte)
  {
+ 	preempt_disable();
  	__pte_next(pte) = (unsigned long) pte_quicklist;
  	pte_quicklist = (unsigned long *) pte;
  	pgtable_cache_size++;
+ 	preempt_enable();
  }
  
  #else	/* CONFIG_NO_PGT_CACHE */
*** ./include/asm-sh/hardirq.h	Sat Sep  8 15:29:09 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-sh/hardirq.h	Tue May 14 15:09:18 2002
***************
*** 34,39 ****
--- 34,41 ----
  
  #define synchronize_irq()	barrier()
  
+ #define release_irqlock(cpu)	do { } while (0)
+ 
  #else
  
  #error Super-H SMP is not available
*** ./include/asm-sh/smplock.h	Sat Sep  8 15:29:09 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-sh/smplock.h	Tue May 14 15:09:18 2002
***************
*** 9,23 ****
  
  #include <linux/config.h>
  
! #ifndef CONFIG_SMP
! 
  #define lock_kernel()				do { } while(0)
  #define unlock_kernel()				do { } while(0)
! #define release_kernel_lock(task, cpu, depth)	((depth) = 1)
! #define reacquire_kernel_lock(task, cpu, depth)	do { } while(0)
  
  #else
! #error "We do not support SMP on SH"
! #endif /* CONFIG_SMP */
  
  #endif /* __ASM_SH_SMPLOCK_H */
--- 9,96 ----
  
  #include <linux/config.h>
  
! #if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
! /*
!  * Should never happen, since linux/smp_lock.h catches this case;
!  * but in case this file is included directly with neither SMP nor
!  * PREEMPT configuration, provide same dummys as linux/smp_lock.h
!  */
  #define lock_kernel()				do { } while(0)
  #define unlock_kernel()				do { } while(0)
! #define release_kernel_lock(task, cpu)		do { } while(0)
! #define reacquire_kernel_lock(task)		do { } while(0)
! #define kernel_locked()		1
! 
! #else /* CONFIG_SMP || CONFIG_PREEMPT */
! 
! #if CONFIG_SMP
! #error "We do not support SMP on SH yet"
! #endif
! /*
!  * Default SMP lock implementation (i.e. the i386 version)
!  */
! 
! #include <linux/interrupt.h>
! #include <linux/spinlock.h>
! 
! extern spinlock_t kernel_flag;
! #define lock_bkl() spin_lock(&kernel_flag)
! #define unlock_bkl() spin_unlock(&kernel_flag)
  
+ #ifdef CONFIG_SMP
+ #define kernel_locked()		spin_is_locked(&kernel_flag)
+ #elif  CONFIG_PREEMPT
+ #define kernel_locked()		preempt_get_count()
+ #else  /* neither */
+ #define kernel_locked()		1
+ #endif
+ 
+ /*
+  * Release global kernel lock and global interrupt lock
+  */
+ #define release_kernel_lock(task, cpu) \
+ do { \
+ 	if (task->lock_depth >= 0) \
+ 		spin_unlock(&kernel_flag); \
+ 	release_irqlock(cpu); \
+ 	__sti(); \
+ } while (0)
+ 
+ /*
+  * Re-acquire the kernel lock
+  */
+ #define reacquire_kernel_lock(task) \
+ do { \
+ 	if (task->lock_depth >= 0) \
+ 		spin_lock(&kernel_flag); \
+ } while (0)
+ 
+ /*
+  * Getting the big kernel lock.
+  *
+  * This cannot happen asynchronously,
+  * so we only need to worry about other
+  * CPU's.
+  */
+ static __inline__ void lock_kernel(void)
+ {
+ #ifdef CONFIG_PREEMPT
+ 	if (current->lock_depth == -1)
+ 		spin_lock(&kernel_flag);
+ 	++current->lock_depth;
  #else
! 	if (!++current->lock_depth)
! 		spin_lock(&kernel_flag);
! #endif
! }
! 
! static __inline__ void unlock_kernel(void)
! {
! 	if (current->lock_depth < 0)
! 		BUG();
! 	if (--current->lock_depth < 0)
! 		spin_unlock(&kernel_flag);
! }
! #endif /* CONFIG_SMP || CONFIG_PREEMPT */
  
  #endif /* __ASM_SH_SMPLOCK_H */
*** ./include/asm-sh/softirq.h	Sat Sep  8 15:29:09 2001
--- ../linux-2.4.19-pre8-ac3p/./include/asm-sh/softirq.h	Tue May 14 15:09:18 2002
***************
*** 6,11 ****
--- 6,12 ----
  
  #define local_bh_disable()			\
  do {						\
+ 	preempt_disable();			\
  	local_bh_count(smp_processor_id())++;	\
  	barrier();				\
  } while (0)
***************
*** 14,19 ****
--- 15,21 ----
  do {						\
  	barrier();				\
  	local_bh_count(smp_processor_id())--;	\
+ 	preempt_enable();			\
  } while (0)
  
  #define local_bh_enable()				\
***************
*** 23,28 ****
--- 25,31 ----
  	    && softirq_pending(smp_processor_id())) {	\
  		do_softirq();				\
  	}						\
+ 	preempt_enable();				\
  } while (0)
  
  #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
*** ./net/socket.c	Tue May 14 14:59:20 2002
--- ../linux-2.4.19-pre8-ac3p/./net/socket.c	Tue May 14 15:09:18 2002
***************
*** 132,138 ****
  
  static struct net_proto_family *net_families[NPROTO];
  
! #ifdef CONFIG_SMP
  static atomic_t net_family_lockct = ATOMIC_INIT(0);
  static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
  
--- 132,138 ----
  
  static struct net_proto_family *net_families[NPROTO];
  
! #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
  static atomic_t net_family_lockct = ATOMIC_INIT(0);
  static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
  
*** ./net/sunrpc/pmap_clnt.c	Tue May 14 14:56:02 2002
--- ../linux-2.4.19-pre8-ac3p/./net/sunrpc/pmap_clnt.c	Tue May 14 15:09:18 2002
***************
*** 12,17 ****
--- 12,18 ----
  #include <linux/config.h>
  #include <linux/types.h>
  #include <linux/socket.h>
+ #include <linux/sched.h>
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/uio.h>
*** ./MAINTAINERS	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./MAINTAINERS	Tue May 14 15:09:17 2002
***************
*** 1317,1322 ****
--- 1317,1330 ----
  M:	mostrows@styx.uwaterloo.ca
  S:	Maintained
  
+ PREEMPTIBLE KERNEL
+ P:	Robert M. Love
+ M:	rml@tech9.net
+ L:	linux-kernel@vger.kernel.org
+ L:	kpreempt-tech@lists.sourceforge.net
+ W:	http://tech9.net/rml/linux
+ S:	Supported
+ 
  PROMISE DC4030 CACHING DISK CONTROLLER DRIVER
  P:	Peter Denison
  M:	promise@pnd-pc.demon.co.uk
*** ./drivers/sound/sound_core.c	Sun Sep 30 15:26:08 2001
--- ../linux-2.4.19-pre8-ac3p/./drivers/sound/sound_core.c	Tue May 14 15:09:17 2002
***************
*** 37,42 ****
--- 37,43 ----
  #include <linux/config.h>
  #include <linux/module.h>
  #include <linux/init.h>
+ #include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
*** ./drivers/ieee1394/csr.c	Tue May 14 14:55:42 2002
--- ../linux-2.4.19-pre8-ac3p/./drivers/ieee1394/csr.c	Tue May 14 15:09:17 2002
***************
*** 10,15 ****
--- 10,16 ----
   */
  
  #include <linux/string.h>
+ #include <linux/sched.h>
  
  #include "ieee1394_types.h"
  #include "hosts.h"
*** ./arch/i386/config.in	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/i386/config.in	Tue May 14 15:09:17 2002
***************
*** 199,204 ****
--- 199,205 ----
  bool 'Math emulation' CONFIG_MATH_EMULATION
  bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
  bool 'Symmetric multi-processing support' CONFIG_SMP
+ bool 'Preemptible Kernel' CONFIG_PREEMPT
  if [ "$CONFIG_SMP" != "y" ]; then
     bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
     dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
***************
*** 212,220 ****
     bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
  fi
  
! if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
!    define_bool CONFIG_HAVE_DEC_LOCK y
  fi
  endmenu
  
  mainmenu_option next_comment
--- 213,224 ----
     bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
  fi
  
! if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
!    if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
!       define_bool CONFIG_HAVE_DEC_LOCK y
!    fi
  fi
+ 
  endmenu
  
  mainmenu_option next_comment
*** ./arch/i386/lib/dec_and_lock.c	Fri Jul  7 21:20:16 2000
--- ../linux-2.4.19-pre8-ac3p/./arch/i386/lib/dec_and_lock.c	Tue May 14 15:09:17 2002
***************
*** 8,13 ****
--- 8,14 ----
   */
  
  #include <linux/spinlock.h>
+ #include <linux/sched.h>
  #include <asm/atomic.h>
  
  int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
*** ./arch/i386/kernel/entry.S	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/i386/kernel/entry.S	Tue May 14 15:09:17 2002
***************
*** 71,77 ****
   * these are offsets into the task-struct.
   */
  state		=  0
! flags		=  4
  sigpending	=  8
  addr_limit	= 12
  exec_domain	= 16
--- 71,77 ----
   * these are offsets into the task-struct.
   */
  state		=  0
! preempt_count	=  4
  sigpending	=  8
  addr_limit	= 12
  exec_domain	= 16
***************
*** 79,86 ****
--- 79,106 ----
  tsk_ptrace	= 24
  cpu		= 32
  
+ /* These are offsets into the irq_stat structure
+  * There is one per cpu and it is aligned to 32
+  * byte boundry (we put that here as a shift count)
+  */
+ irq_array_shift                 = CONFIG_X86_L1_CACHE_SHIFT
+ 
+ irq_stat_local_irq_count        = 4
+ irq_stat_local_bh_count         = 8
+ 
  ENOSYS = 38
  
+ #ifdef CONFIG_SMP
+ #define GET_CPU_INDX	movl cpu(%ebx),%eax;  \
+                         shll $irq_array_shift,%eax
+ #define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \
+                              GET_CPU_INDX
+ #define CPU_INDX (,%eax)
+ #else
+ #define GET_CPU_INDX
+ #define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx)
+ #define CPU_INDX
+ #endif
  
  #define SAVE_ALL \
  	cld; \
***************
*** 176,182 ****
  
  
  ENTRY(ret_from_fork)
! #if CONFIG_SMP
  	pushl %ebx
  	call SYMBOL_NAME(schedule_tail)
  	addl $4, %esp
--- 196,202 ----
  
  
  ENTRY(ret_from_fork)
! #if CONFIG_SMP || CONFIG_PREEMPT
  	pushl %ebx
  	call SYMBOL_NAME(schedule_tail)
  	addl $4, %esp
***************
*** 249,260 ****
--- 269,298 ----
  	ALIGN
  ENTRY(ret_from_intr)
  	GET_CURRENT(%ebx)
+ #ifdef CONFIG_PREEMPT
+ 	cli
+ 	decl preempt_count(%ebx)
+ #endif
  ret_from_exception:
  	movl EFLAGS(%esp),%eax		# mix EFLAGS and CS
  	movb CS(%esp),%al
  	testl $(VM_MASK | 3),%eax	# return to VM86 mode or non-supervisor?
  	jne ret_from_sys_call
+ #ifdef CONFIG_PREEMPT
+ 	cmpl $0,preempt_count(%ebx)
+ 	jnz restore_all
+ 	cmpl $0,need_resched(%ebx)
+ 	jz restore_all
+ 	movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
+ 	addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
+ 	jnz restore_all
+ 	incl preempt_count(%ebx)
+ 	sti
+ 	call SYMBOL_NAME(preempt_schedule)
+ 	jmp ret_from_intr
+ #else
  	jmp restore_all
+ #endif
  
  	ALIGN
  reschedule:
***************
*** 291,296 ****
--- 329,337 ----
  	GET_CURRENT(%ebx)
  	call *%edi
  	addl $8,%esp
+ #ifdef CONFIG_PREEMPT
+ 	cli
+ #endif
  	jmp ret_from_exception
  
  ENTRY(coprocessor_error)
***************
*** 310,321 ****
--- 351,368 ----
  	movl %cr0,%eax
  	testl $0x4,%eax			# EM (math emulation bit)
  	jne device_not_available_emulate
+ #ifdef CONFIG_PREEMPT
+ 	cli
+ #endif
  	call SYMBOL_NAME(math_state_restore)
  	jmp ret_from_exception
  device_not_available_emulate:
  	pushl $0		# temporary storage for ORIG_EIP
  	call  SYMBOL_NAME(math_emulate)
  	addl $4,%esp
+ #ifdef CONFIG_PREEMPT
+ 	cli
+ #endif
  	jmp ret_from_exception
  
  ENTRY(debug)
*** ./arch/i386/kernel/traps.c	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/i386/kernel/traps.c	Tue May 14 15:09:17 2002
***************
*** 756,761 ****
--- 756,763 ----
   *
   * Careful.. There are problems with IBM-designed IRQ13 behaviour.
   * Don't touch unless you *really* know how it works.
+  *
+  * Must be called with kernel preemption disabled.
   */
  asmlinkage void math_state_restore(struct pt_regs regs)
  {
*** ./arch/i386/kernel/i387.c	Fri Feb 23 13:09:08 2001
--- ../linux-2.4.19-pre8-ac3p/./arch/i386/kernel/i387.c	Tue May 14 15:09:17 2002
***************
*** 10,15 ****
--- 10,16 ----
  
  #include <linux/config.h>
  #include <linux/sched.h>
+ #include <linux/spinlock.h>
  #include <asm/processor.h>
  #include <asm/i387.h>
  #include <asm/math_emu.h>
***************
*** 65,70 ****
--- 66,73 ----
  {
  	struct task_struct *tsk = current;
  
+ 	preempt_disable();
+ 	
  	if (tsk->flags & PF_USEDFPU) {
  		__save_init_fpu(tsk);
  		return;
*** ./arch/arm/config.in	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/arm/config.in	Tue May 14 15:09:17 2002
***************
*** 552,557 ****
--- 552,558 ----
  if [ "$CONFIG_ISDN" != "n" ]; then
     source drivers/isdn/Config.in
  fi
+ dep_bool 'Preemptible Kernel' CONFIG_PREEMPT $CONFIG_CPU_32
  endmenu
  
  #
*** ./arch/arm/kernel/entry-armv.S	Tue May 14 14:55:40 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/arm/kernel/entry-armv.S	Tue May 14 15:09:17 2002
***************
*** 697,702 ****
--- 697,708 ----
  		add	r4, sp, #S_SP
  		mov	r6, lr
  		stmia	r4, {r5, r6, r7, r8, r9}	@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
+ #ifdef CONFIG_PREEMPT
+ 		get_current_task r9
+ 		ldr	r8, [r9, #TSK_PREEMPT]
+ 		add	r8, r8, #1
+ 		str	r8, [r9, #TSK_PREEMPT]
+ #endif
  1:		get_irqnr_and_base r0, r6, r5, lr
  		movne	r1, sp
  		@
***************
*** 704,709 ****
--- 710,734 ----
  		@
  		adrsvc	ne, lr, 1b
  		bne	do_IRQ
+ #ifdef CONFIG_PREEMPT
+ 2:		ldr	r8, [r9, #TSK_PREEMPT]
+ 		subs	r8, r8, #1
+ 		bne	3f
+ 		ldr	r7, [r9, #TSK_NEED_RESCHED]
+ 		teq	r7, #0
+ 		beq	3f
+ 		ldr	r6, .LCirqstat
+ 		ldr	r0, [r6, #IRQSTAT_BH_COUNT]
+ 		teq	r0, #0
+ 		bne	3f
+ 		mov	r0, #MODE_SVC
+ 		msr	cpsr_c, r0		@ enable interrupts
+ 		bl	SYMBOL_NAME(preempt_schedule)
+ 		mov	r0, #I_BIT | MODE_SVC
+ 		msr	cpsr_c, r0              @ disable interrupts
+ 		b	2b
+ 3:		str	r8, [r9, #TSK_PREEMPT]
+ #endif
  		ldr	r0, [sp, #S_PSR]		@ irqs are already disabled
  		msr	spsr, r0
  		ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
***************
*** 761,766 ****
--- 786,794 ----
  .LCprocfns:	.word	SYMBOL_NAME(processor)
  #endif
  .LCfp:		.word	SYMBOL_NAME(fp_enter)
+ #ifdef CONFIG_PREEMPT
+ .LCirqstat:	.word	SYMBOL_NAME(irq_stat)
+ #endif
  
  		irq_prio_table
  
***************
*** 801,806 ****
--- 829,840 ----
  		stmdb	r8, {sp, lr}^
  		alignment_trap r4, r7, __temp_irq
  		zero_fp
+ 		get_current_task tsk
+ #ifdef CONFIG_PREEMPT
+ 		ldr	r0, [tsk, #TSK_PREEMPT]
+ 		add	r0, r0, #1
+ 		str	r0, [tsk, #TSK_PREEMPT]
+ #endif
  1:		get_irqnr_and_base r0, r6, r5, lr
  		movne	r1, sp
  		adrsvc	ne, lr, 1b
***************
*** 808,815 ****
  		@ routine called with r0 = irq number, r1 = struct pt_regs *
  		@
  		bne	do_IRQ
  		mov	why, #0
- 		get_current_task tsk
  		b	ret_to_user
  
  		.align	5
--- 842,853 ----
  		@ routine called with r0 = irq number, r1 = struct pt_regs *
  		@
  		bne	do_IRQ
+ #ifdef CONFIG_PREEMPT
+ 		ldr	r0, [tsk, #TSK_PREEMPT]
+ 		sub	r0, r0, #1
+ 		str	r0, [tsk, #TSK_PREEMPT]
+ #endif
  		mov	why, #0
  		b	ret_to_user
  
  		.align	5
*** ./arch/arm/tools/getconstants.c	Thu Oct 11 12:04:57 2001
--- ../linux-2.4.19-pre8-ac3p/./arch/arm/tools/getconstants.c	Tue May 14 15:09:17 2002
***************
*** 13,18 ****
--- 13,19 ----
  
  #include <asm/pgtable.h>
  #include <asm/uaccess.h>
+ #include <asm/hardirq.h>
  
  /*
   * Make sure that the compiler and target are compatible.
***************
*** 38,43 ****
--- 39,49 ----
  
  DEFN("TSS_SAVE",		OFF_TSK(thread.save));
  DEFN("TSS_FPESAVE",		OFF_TSK(thread.fpstate.soft.save));
+ 
+ #ifdef CONFIG_PREEMPT
+ DEFN("TSK_PREEMPT",		OFF_TSK(preempt_count));
+ DEFN("IRQSTAT_BH_COUNT",	(unsigned long)&(((irq_cpustat_t *)0)->__local_bh_count));
+ #endif
  
  #ifdef CONFIG_CPU_32
  DEFN("TSS_DOMAIN",		OFF_TSK(thread.domain));
*** ./arch/sh/config.in	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/sh/config.in	Tue May 14 15:09:17 2002
***************
*** 124,129 ****
--- 124,130 ----
     hex 'Physical memory start address' CONFIG_MEMORY_START 08000000
     hex 'Physical memory size' CONFIG_MEMORY_SIZE 00400000
  fi
+ bool 'Preemptible Kernel' CONFIG_PREEMPT
  endmenu
  
  if [ "$CONFIG_SH_HP690" = "y" ]; then
*** ./arch/sh/kernel/entry.S	Tue May 14 14:55:42 2002
--- ../linux-2.4.19-pre8-ac3p/./arch/sh/kernel/entry.S	Tue May 14 15:09:17 2002
***************
*** 60,69 ****
  /*
   * These are offsets into the task-struct.
   */
! flags		=  4
  sigpending	=  8
  need_resched	= 20
  tsk_ptrace	= 24
  
  PT_TRACESYS  = 0x00000002
  PF_USEDFPU   = 0x00100000
--- 60,77 ----
  /*
   * These are offsets into the task-struct.
   */
! preempt_count	=  4
  sigpending	=  8
  need_resched	= 20
  tsk_ptrace	= 24
+ flags		= 84
+ 
+ /*
+  * These offsets are into irq_stat.
+  * (Find irq_cpustat_t in asm-sh/hardirq.h)
+  */
+ local_irq_count =  8
+ local_bh_count  = 12
  
  PT_TRACESYS  = 0x00000002
  PF_USEDFPU   = 0x00100000
***************
*** 143,149 ****
  	mov.l	__INV_IMASK, r11;	\
  	stc	sr, r10;		\
  	and	r11, r10;		\
! 	stc	k_g_imask, r11;	\
  	or	r11, r10;		\
  	ldc	r10, sr
  
--- 151,157 ----
  	mov.l	__INV_IMASK, r11;	\
  	stc	sr, r10;		\
  	and	r11, r10;		\
! 	stc	k_g_imask, r11;		\
  	or	r11, r10;		\
  	ldc	r10, sr
  
***************
*** 304,311 ****
  	mov.l	@(tsk_ptrace,r0), r0	! Is current PTRACE_SYSCALL'd?
  	mov	#PT_TRACESYS, r1
  	tst	r1, r0
! 	bt	ret_from_syscall
! 	bra	syscall_ret_trace
  	 nop	 
  
  	.align	2
--- 312,319 ----
  	mov.l	@(tsk_ptrace,r0), r0	! Is current PTRACE_SYSCALL'd?
  	mov	#PT_TRACESYS, r1
  	tst	r1, r0
! 	bf	syscall_ret_trace
! 	bra	ret_from_syscall
  	 nop	 
  
  	.align	2
***************
*** 505,512 ****
  	.long	syscall_ret_trace
  __syscall_ret:
  	.long	syscall_ret
- __INV_IMASK:
- 	.long	0xffffff0f	! ~(IMASK)
  
  
  	.align	2
--- 513,518 ----
***************
*** 518,524 ****
--- 524,607 ----
  	.align	2
  1:	.long	SYMBOL_NAME(schedule)
  
+ #ifdef CONFIG_PREEMPT	
+ 	!
+ 	! Returning from interrupt during kernel mode: check if
+ 	! preempt_schedule should be called. If need_resched flag
+ 	! is set, preempt_count is zero, and we're not currently
+ 	! in an interrupt handler (local irq or bottom half) then
+ 	! call preempt_schedule. 
+ 	!
+ 	! Increment preempt_count to prevent a nested interrupt
+ 	! from reentering preempt_schedule, then decrement after
+ 	! and drop through to regular interrupt return which will
+ 	! jump back and check again in case such an interrupt did
+ 	! come in (and didn't preempt due to preempt_count).
+ 	!
+ 	! NOTE:	because we just checked that preempt_count was
+ 	! zero before getting to the call, can't we use immediate
+ 	! values (1 and 0) rather than inc/dec? Also, rather than
+ 	! drop through to ret_from_irq, we already know this thread
+ 	! is kernel mode, can't we go direct to ret_from_kirq? In
+ 	! fact, with proper interrupt nesting and so forth could
+ 	! the loop simply be on the need_resched w/o checking the
+ 	! other stuff again? Optimize later...
+ 	!
+ 	.align	2
+ ret_from_kirq:
+ 	! Nonzero preempt_count prevents scheduling
+ 	stc	k_current, r1
+ 	mov.l	@(preempt_count,r1), r0
+ 	cmp/eq	#0, r0
+ 	bf	restore_all
+ 	! Zero need_resched prevents scheduling
+ 	mov.l	@(need_resched,r1), r0
+ 	cmp/eq	#0, r0
+ 	bt	restore_all
+ 	! If in_interrupt(), don't schedule
+ 	mov.l	__irq_stat, r1
+ 	mov.l	@(local_irq_count,r1), r0
+ 	mov.l	@(local_bh_count,r1), r1
+ 	or	r1, r0
+ 	cmp/eq	#0, r0
+ 	bf	restore_all
+ 	! Allow scheduling using preempt_schedule
+ 	! Adjust preempt_count and SR as needed.
+ 	stc	k_current, r1
+ 	mov.l	@(preempt_count,r1), r0	! Could replace this ...
+ 	add	#1, r0			! ... and this w/mov #1?
+ 	mov.l	r0, @(preempt_count,r1)
+ 	STI()
+ 	mov.l	__preempt_schedule, r0
+ 	jsr	@r0
+ 	 nop	
+ 	/* CLI */
+ 	stc	sr, r0
+ 	or	#0xf0, r0
+ 	ldc	r0, sr
+ 	!
+ 	stc	k_current, r1
+ 	mov.l	@(preempt_count,r1), r0	! Could replace this ...
+ 	add	#-1, r0			! ... and this w/mov #0?
+ 	mov.l	r0, @(preempt_count,r1)
+ 	! Maybe should bra ret_from_kirq, or loop over need_resched?
+ 	! For now, fall through to ret_from_irq again...
+ #endif /* CONFIG_PREEMPT */
+ 	
  ret_from_irq:
+ 	mov	#OFF_SR, r0
+ 	mov.l	@(r0,r15), r0	! get status register
+ 	shll	r0
+ 	shll	r0		! kernel space?
+ #ifndef CONFIG_PREEMPT
+ 	bt	restore_all	! Yes, it's from kernel, go back soon
+ #else /* CONFIG_PREEMPT */
+ 	bt	ret_from_kirq	! From kernel: maybe preempt_schedule
+ #endif /* CONFIG_PREEMPT */
+ 	!
+ 	bra	ret_from_syscall
+ 	 nop
+ 
  ret_from_exception:
  	mov	#OFF_SR, r0
  	mov.l	@(r0,r15), r0	! get status register
***************
*** 564,569 ****
--- 647,659 ----
  	.long	SYMBOL_NAME(do_signal)
  __irq_stat:
  	.long	SYMBOL_NAME(irq_stat)
+ #ifdef CONFIG_PREEMPT
+ __preempt_schedule:
+ 	.long	SYMBOL_NAME(preempt_schedule)
+ #endif /* CONFIG_PREEMPT */	
+ __INV_IMASK:
+ 	.long	0xffffff0f	! ~(IMASK)
+ 
  
  	.align 2
  restore_all:
***************
*** 679,685 ****
  __fpu_prepare_fd:
  	.long	SYMBOL_NAME(fpu_prepare_fd)
  __init_task_flags:
! 	.long	SYMBOL_NAME(init_task_union)+4
  __PF_USEDFPU:
  	.long	PF_USEDFPU
  #endif
--- 769,775 ----
  __fpu_prepare_fd:
  	.long	SYMBOL_NAME(fpu_prepare_fd)
  __init_task_flags:
! 	.long	SYMBOL_NAME(init_task_union)+flags
  __PF_USEDFPU:
  	.long	PF_USEDFPU
  #endif
*** ./arch/sh/kernel/irq.c	Sat Sep  8 15:29:09 2001
--- ../linux-2.4.19-pre8-ac3p/./arch/sh/kernel/irq.c	Tue May 14 15:09:17 2002
***************
*** 229,234 ****
--- 229,242 ----
  	struct irqaction * action;
  	unsigned int status;
  
+ 	/*
+ 	 * At this point we're now about to actually call handlers,
+ 	 * and interrupts might get reenabled during them... bump
+ 	 * preempt_count to prevent any preemption while the handler
+  	 * called here is pending...
+  	 */
+  	preempt_disable();
+ 
  	/* Get IRQ number */
  	asm volatile("stc	r2_bank, %0\n\t"
  		     "shlr2	%0\n\t"
***************
*** 298,305 ****
--- 306,322 ----
  	desc->handler->end(irq);
  	spin_unlock(&desc->lock);
  
+ 
  	if (softirq_pending(cpu))
  		do_softirq();
+ 
+ 	/*
+ 	 * We're done with the handlers, interrupts should be
+ 	 * currently disabled; decrement preempt_count now so
+ 	 * as we return preemption may be allowed...
+ 	 */
+ 	preempt_enable_no_resched();
+ 
  	return 1;
  }
  
*** ./Documentation/Configure.help	Tue May 14 14:59:18 2002
--- ../linux-2.4.19-pre8-ac3p/./Documentation/Configure.help	Tue May 14 15:09:17 2002
***************
*** 291,296 ****
--- 291,307 ----
    If you have a system with several CPUs, you do not need to say Y
    here: the local APIC will be used automatically.
  
+ Preemptible Kernel
+ CONFIG_PREEMPT
+   This option reduces the latency of the kernel when reacting to
+   real-time or interactive events by allowing a low priority process to
+   be preempted even if it is in kernel mode executing a system call.
+   This allows applications to run more reliably even when the system is
+   under load.
+ 
+   Say Y here if you are building a kernel for a desktop, embedded
+   real-time system.  Say N if you are unsure.
+ 
  Kernel math emulation
  CONFIG_MATH_EMULATION
    Linux can emulate a math coprocessor (used for floating point

  parent reply	other threads:[~2002-05-15  1:31 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-05-14  1:19 Rik van Riel
2002-05-14  2:18 ` Andrew Morton
2002-05-14 12:30   ` Rik van Riel
2002-05-15 17:02   ` Denis Vlasenko
2002-05-16  7:41     ` Andrew Morton
2002-05-14 15:39 ` William Lee Irwin III
2002-05-14 16:36   ` Rik van Riel
2002-05-14 16:54     ` William Lee Irwin III
2002-05-15 17:17       ` Denis Vlasenko
2002-05-15 14:03         ` Rik van Riel
2002-05-15 20:17           ` Denis Vlasenko
2002-05-15 16:13             ` Rik van Riel
2002-05-15 16:21               ` William Lee Irwin III
2002-05-15 17:00               ` William Lee Irwin III
2002-05-15 18:16                 ` Bill Davidsen
2002-05-15 18:30                 ` William Lee Irwin III
2002-05-15 18:33                   ` Rik van Riel
2002-05-15 18:46                     ` William Lee Irwin III
2002-05-15 19:00                       ` Rik van Riel
2002-05-16 11:42                         ` Denis Vlasenko
2002-05-16  9:49               ` Leigh Brown
2002-05-16 14:51                 ` Rik van Riel
2002-05-16 16:44                   ` Leigh Brown
2002-05-17  8:02                     ` Jens Axboe
2002-05-16 11:14               ` Denis Vlasenko
2002-05-15 15:15         ` Bill Davidsen
2002-05-16 10:58           ` Denis Vlasenko
2002-05-14 18:19     ` Martin J. Bligh
2002-05-15  1:31 ` Bill Davidsen [this message]
2002-05-15  1:41   ` William Lee Irwin III
2002-05-15 14:39     ` Bill Davidsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.3.96.1020514212343.2164A-200000@gatekeeper.tmr.com \
    --to=davidsen@tmr.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@conectiva.com.br \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox