linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mike Galbraith <efault@gmx.de>
To: elenstev@mesatop.com
Cc: Ed Tomlinson <tomlins@cam.org>, Andrew Morton <akpm@digeo.com>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	linux-mm@kvack.org
Subject: Re: 2.5.65-mm2
Date: Thu, 20 Mar 2003 22:07:54 +0100	[thread overview]
Message-ID: <5.2.0.9.2.20030320220413.00ceaa98@pop.gmx.net> (raw)
In-Reply-To: <1048191154.1638.34.camel@spc1.esa.lanl.gov>

[-- Attachment #1: Type: text/plain, Size: 690 bytes --]

At 01:12 PM 3/20/2003 -0700, Steven P. Cole wrote:
>On Thu, 2003-03-20 at 12:48, Mike Galbraith wrote:
> > At 07:36 AM 3/20/2003 -0700, Steven Cole wrote:
> > Bottom line is that once cpu hogs are falsely determined to be sleepers,
> > positive feedback kills you.
> >
> >          -Mike
> >
> >
>Sure, either post a patch against a known sync point, .65, .65-bk, or
>65-mm2, or send me the sched.c file itself (2600 lines might be a little
>too much for the entire list).
>
>If you send it in the next 2 hours, I can test today, otherwise I'll do
>it mañana.

What the heck.  It is attached.

         -Mike

(and I repeat, don't _look_, just run it, and let me know;) 

[-- Attachment #2: xx.diff --]
[-- Type: application/octet-stream, Size: 7788 bytes --]

diff -urN linux-2.5.65.virgin/include/linux/sched.h linux-2.5.65/include/linux/sched.h
--- linux-2.5.65.virgin/include/linux/sched.h	Thu Mar 20 22:11:52 2003
+++ linux-2.5.65/include/linux/sched.h	Tue Mar 18 19:19:38 2003
@@ -328,7 +328,8 @@
 	prio_array_t *array;
 
 	unsigned long sleep_avg;
-	unsigned long last_run;
+	unsigned long sleep_begin;
+	unsigned long sleep_end;
 
 	unsigned long policy;
 	unsigned long cpus_allowed;
diff -urN linux-2.5.65.virgin/kernel/fork.c linux-2.5.65/kernel/fork.c
--- linux-2.5.65.virgin/kernel/fork.c	Thu Mar 20 22:11:54 2003
+++ linux-2.5.65/kernel/fork.c	Tue Mar 18 19:23:00 2003
@@ -918,7 +918,7 @@
 	 */
 	p->first_time_slice = 1;
 	current->time_slice >>= 1;
-	p->last_run = jiffies;
+	p->sleep_begin = p->sleep_end = jiffies;
 	if (!current->time_slice) {
 		/*
 	 	 * This case is rare, it happens when the parent has only
diff -urN linux-2.5.65.virgin/kernel/printk.c linux-2.5.65/kernel/printk.c
--- linux-2.5.65.virgin/kernel/printk.c	Thu Mar 20 22:11:54 2003
+++ linux-2.5.65/kernel/printk.c	Wed Mar 19 06:37:45 2003
@@ -510,8 +510,10 @@
 	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
+#if 0 // MIKEDIDIT
 	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
 		wake_up_interruptible(&log_wait);
+#endif
 }
 
 /** console_conditional_schedule - yield the CPU if required
diff -urN linux-2.5.65.virgin/kernel/sched.c linux-2.5.65/kernel/sched.c
--- linux-2.5.65.virgin/kernel/sched.c	Thu Mar 20 22:11:54 2003
+++ linux-2.5.65/kernel/sched.c	Thu Mar 20 15:13:34 2003
@@ -67,12 +67,13 @@
 #define MIN_TIMESLICE		( 10 * HZ / 1000)
 #define MAX_TIMESLICE		(200 * HZ / 1000)
 #define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
+#define PARENT_PENALTY		85
 #define EXIT_WEIGHT		3
 #define PRIO_BONUS_RATIO	25
 #define INTERACTIVE_DELTA	2
 #define MAX_SLEEP_AVG		(10*HZ)
-#define STARVATION_LIMIT	(10*HZ)
+#define STARVATION_LIMIT	(1*MAX_TIMESLICE)
+#define TIMESLICE_GRANULARITY	(HZ/20 ?: 1)
 #define NODE_THRESHOLD		125
 
 /*
@@ -332,59 +333,27 @@
 {
 	enqueue_task(p, rq->active);
 	nr_running_inc(rq);
+	p->sleep_end = jiffies;
 }
 
 /*
- * activate_task - move a task to the runqueue and do priority recalculation
- *
- * Update all the scheduling statistics stuff. (sleep average
- * calculation, priority modifiers, etc.)
+ * activate_task - move a task to the runqueue and do priority
+ * recalculation.  If the waker is maximum-interactive, give an
+ * additional boost to the sleeper as well.  This has the effect
+ * of boosting tasks which are related to interactive task.
  */
 static inline int activate_task(task_t *p, runqueue_t *rq)
 {
-	long sleep_time = jiffies - p->last_run - 1;
-	int requeue_waker = 0;
-
-	if (sleep_time > 0) {
-		int sleep_avg;
-
-		/*
-		 * This code gives a bonus to interactive tasks.
-		 *
-		 * The boost works by updating the 'average sleep time'
-		 * value here, based on ->last_run. The more time a task
-		 * spends sleeping, the higher the average gets - and the
-		 * higher the priority boost gets as well.
-		 */
-		sleep_avg = p->sleep_avg + sleep_time;
-
-		/*
-		 * 'Overflow' bonus ticks go to the waker as well, so the
-		 * ticks are not lost. This has the effect of further
-		 * boosting tasks that are related to maximum-interactive
-		 * tasks.
-		 */
-		if (sleep_avg > MAX_SLEEP_AVG) {
-			if (!in_interrupt()) {
-				sleep_avg += current->sleep_avg - MAX_SLEEP_AVG;
-				if (sleep_avg > MAX_SLEEP_AVG)
-					sleep_avg = MAX_SLEEP_AVG;
-
-				if (current->sleep_avg != sleep_avg) {
-					current->sleep_avg = sleep_avg;
-					requeue_waker = 1;
-				}
-			}
-			sleep_avg = MAX_SLEEP_AVG;
-		}
-		if (p->sleep_avg != sleep_avg) {
-			p->sleep_avg = sleep_avg;
-			p->prio = effective_prio(p);
-		}
+	int requeue_waker = in_interrupt();
+	if (!requeue_waker && current->sleep_avg == MAX_SLEEP_AVG) {
+		p->sleep_avg += TIMESLICE_GRANULARITY;
+		if (p->sleep_avg > MAX_SLEEP_AVG)
+			p->sleep_avg = MAX_SLEEP_AVG;
 	}
+	p->prio = effective_prio(p);
 	__activate_task(p, rq);
 
-	return requeue_waker;
+	return requeue_waker ? 0 : TASK_INTERACTIVE(p);
 }
 
 /*
@@ -397,6 +366,7 @@
 		rq->nr_uninterruptible++;
 	dequeue_task(p, p->array);
 	p->array = NULL;
+	p->sleep_begin = jiffies;
 }
 
 /*
@@ -1063,7 +1033,7 @@
 	 */
 
 #define CAN_MIGRATE_TASK(p,rq,this_cpu)					\
-	((jiffies - (p)->last_run > cache_decay_ticks) &&	\
+	((jiffies - (p)->sleep_end > cache_decay_ticks) &&	\
 		!task_running(rq, p) &&					\
 			((p)->cpus_allowed & (1UL << (this_cpu))))
 
@@ -1176,10 +1146,17 @@
  * load-dependent, as the frequency of array switched decreases with
  * increasing number of running tasks:
  */
+#if 0
 #define EXPIRED_STARVING(rq) \
 		(STARVATION_LIMIT && ((rq)->expired_timestamp && \
 		(jiffies - (rq)->expired_timestamp >= \
 			STARVATION_LIMIT * ((rq)->nr_running) + 1)))
+#else
+#define EXPIRED_STARVING(rq) \
+		(STARVATION_LIMIT && ((rq)->expired_timestamp && \
+		(jiffies - (rq)->expired_timestamp >= \
+			STARVATION_LIMIT * ((rq)->active->nr_active) + 1)))
+#endif
 
 /*
  * This function gets called by the timer code, with HZ frequency.
@@ -1194,6 +1171,11 @@
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
 
+	/* Update sleep average. */
+	if (p->sleep_avg)
+		p->sleep_avg--;
+	p->sleep_begin = p->sleep_end = jiffies;
+
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_ticks);
 
@@ -1221,15 +1203,12 @@
 	}
 	spin_lock(&rq->lock);
 	/*
-	 * The task was running during this tick - update the
-	 * time slice counter and the sleep average. Note: we
-	 * do not update a thread's priority until it either
-	 * goes to sleep or uses up its timeslice. This makes
-	 * it possible for interactive tasks to use up their
-	 * timeslices at their highest priority levels.
+	 * The task was running during this tick - update the time
+	 * slice counter. Note: we do not update a thread's priority
+	 * until it either goes to sleep or uses up its timeslice.
+	 * This makes it possible for interactive tasks to use up
+	 * their timeslices at their highest priority levels.
 	 */
-	if (p->sleep_avg)
-		p->sleep_avg--;
 	if (unlikely(rt_task(p))) {
 		/*
 		 * RR tasks need a special form of timeslice management.
@@ -1259,6 +1238,29 @@
 			enqueue_task(p, rq->expired);
 		} else
 			enqueue_task(p, rq->active);
+	} else {
+		/*
+		 * Prevent a too long timeslice from monopolizing the CPU,
+		 * by splitting up the timeslice into smaller pieces.
+		 *
+		 * Note: this does not mean the task's timeslices expire or
+		 * get lost in any way, they just might be preempted by
+		 * another task of equal priority. (one with higher
+		 * priority would have preempted this task already.) We
+		 * requeue this task to the end of the list on this priority
+		 * level, which is in essence a round-robin of tasks with
+		 * equal priority.
+		 */
+		if (!(p->time_slice % TIMESLICE_GRANULARITY) &&
+			       		(p->array == rq->active)) {
+			dequeue_task(p, rq->active);
+			set_tsk_need_resched(p);
+			p->prio = effective_prio(p);
+			enqueue_task(p, rq->active);
+#if 1 // MIKEDIDIT
+			p->sleep_begin += (TIMESLICE_GRANULARITY * rq->active->nr_active);
+#endif
+		}
 	}
 out:
 	spin_unlock(&rq->lock);
@@ -1297,7 +1299,6 @@
 	rq = this_rq();
 
 	release_kernel_lock(prev);
-	prev->last_run = jiffies;
 	spin_lock_irq(&rq->lock);
 
 	/*
@@ -1351,6 +1352,8 @@
 	RCU_qsctr(prev->thread_info->cpu)++;
 
 	if (likely(prev != next)) {
+		static unsigned long time = INITIAL_JIFFIES;  // MIKEDIDIT
+		long slept = 0;
 		rq->nr_switches++;
 		rq->curr = next;
 
@@ -1359,6 +1362,30 @@
 		barrier();
 
 		finish_task_switch(prev);
+		/*
+		 * Update sleep_avg.  Set a limit of MAX_TIMESLICE, and
+		 * try to detect cpu hogs which are doing round robin.
+		 * No sleep bonus for them.

  reply	other threads:[~2003-03-20 21:07 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-03-19  9:21 2.5.65-mm2 Andrew Morton
2003-03-19 10:07 ` 2.5.65-mm2 Alexander Hoogerhuis
2003-03-19 10:16 ` 2.5.65-mm2 Alexander Hoogerhuis
2003-03-19 19:51 ` 2.5.65-mm2 Steven Cole
2003-03-19 20:10   ` 2.5.65-mm2 Andrew Morton
2003-03-19 20:57     ` 2.5.65-mm2 Steven P. Cole
2003-03-19 22:02       ` 2.5.65-mm2 Steven P. Cole
2003-03-20  0:33         ` 2.5.65-mm2 Andrew Morton
2003-03-19 23:45           ` 2.5.65-mm2 Steven P. Cole
2003-03-20  4:27             ` 2.5.65-mm2 Ed Tomlinson
2003-03-20  5:04               ` 2.5.65-mm2 Steven Cole
2003-03-20 14:36               ` 2.5.65-mm2 Steven Cole
2003-03-20 19:48                 ` 2.5.65-mm2 Mike Galbraith
2003-03-20 20:12                   ` 2.5.65-mm2 Steven P. Cole
2003-03-20 21:07                     ` Mike Galbraith [this message]
2003-03-20 21:15                       ` 2.5.65-mm2 Steven P. Cole
2003-03-21  5:20                         ` 2.5.65-mm2 Mike Galbraith
2003-03-21  6:06                   ` 2.5.65-mm2 Ingo Molnar
2003-03-21  6:16                   ` 2.5.65-mm2 Ingo Molnar
     [not found]                   ` <Pine.LNX.4.44.0303210710490.2533-100000@localhost.localdom ain>
2003-03-22 19:50                     ` 2.5.65-mm2 Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5.2.0.9.2.20030320220413.00ceaa98@pop.gmx.net \
    --to=efault@gmx.de \
    --cc=akpm@digeo.com \
    --cc=elenstev@mesatop.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tomlins@cam.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox