[PATCH] OOM handling 2.2.15pre9

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] OOM handling 2.2.15pre9
@ 2000-02-22 14:39 Rik van Riel
  0 siblings, 0 replies; only message in thread
From: Rik van Riel @ 2000-02-22 14:39 UTC (permalink / raw)
  To: Alan Cox; +Cc: Linux MM, Linux Kernel

Hi Alan,

here's the OOM handling patch ported to 2.2.15pre9.
If nobody manages to break it, I'd like to see it
included in 2.2.15...

regards,

Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.



--- linux-2.2.15pre9/mm/page_alloc.c.orig	Sat Feb 19 14:06:52 2000
+++ linux-2.2.15pre9/mm/page_alloc.c	Tue Feb 22 12:36:23 2000
@@ -20,7 +20,9 @@
 
 int nr_swap_pages = 0;
 int nr_free_pages = 0;
+int low_on_memory = 0;
 extern struct wait_queue * kswapd_wait;
+extern int out_of_memory(unsigned long);
 
 /*
  * Free area management
@@ -209,31 +211,50 @@
 	 * further thought.
 	 */
 	if (!(current->flags & PF_MEMALLOC)) {
-		int freed;
 #ifdef SLEEP_MEMORY_DEBUGGING
 		if (current->state != TASK_RUNNING && (gfp_mask & __GFP_WAIT)) {
-			printk("gfp called by non-running (%d) task from %p!\n",
+			printk("gfp called by non-running (%ld) task from %p!\n",
 				current->state, __builtin_return_address(0));
 			/* if we're not running, we can't sleep */
 			gfp_mask &= ~__GFP_WAIT;
 		}
 #endif		
 
+		if (low_on_memory) {
+			int freed;
+			current->flags |= PF_MEMALLOC;
+			freed = try_to_free_pages(gfp_mask);
+			current->flags &= ~PF_MEMALLOC;
+			if (time_after(jiffies, low_on_memory + 60 * HZ))
+				out_of_memory(gfp_mask);
+			if (freed && nr_free_pages > freepages.low)
+				low_on_memory = 0;
+		}
+
 		if (nr_free_pages <= freepages.low) {
 			wake_up_interruptible(&kswapd_wait);
+			if ((gfp_mask & __GFP_WAIT) && current->state == TASK_RUNNING) {
+				schedule();
+				/* kswapd couldn't save us */
+				if (nr_free_pages <= freepages.low)
+					low_on_memory = jiffies;
+			}
 		}
+
 		if (nr_free_pages > freepages.min)
 			goto ok_to_allocate;
 
-		/* Danger, danger! Do something or fail */
-		current->flags |= PF_MEMALLOC;
-		freed = try_to_free_pages(gfp_mask);
-		current->flags &= ~PF_MEMALLOC;
+		/*
+		 * out_of_memory() should usually fix the situation.
+		 * If it does, we can continue like nothing happened.
+		 */
+		if (!out_of_memory(gfp_mask))
+			goto ok_to_allocate;
 
 		if ((gfp_mask & __GFP_MED) && nr_free_pages > freepages.min / 2)
 			goto ok_to_allocate;
 
-		if (!freed && !(gfp_mask & __GFP_HIGH))
+		if (!(gfp_mask & __GFP_HIGH))
 			goto nopage;
 	}
 ok_to_allocate:
--- linux-2.2.15pre9/mm/vmscan.c.orig	Tue Feb 22 12:27:07 2000
+++ linux-2.2.15pre9/mm/vmscan.c	Tue Feb 22 12:18:26 2000
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 
 #include <asm/pgtable.h>
+extern int low_on_memory;
 
 /*
  * The swap-out functions return 1 if they successfully
@@ -495,8 +496,17 @@
 		 */
 		while (nr_free_pages < freepages.high)
 		{
-			if (!do_try_to_free_pages(GFP_KSWAPD))
-				break;
+			if (!do_try_to_free_pages(GFP_KSWAPD)) {
+				/* out of memory? we can't do much */
+				low_on_memory = jiffies;
+				if (nr_free_pages < freepages.min) {
+					run_task_queue(&tq_disk);
+					tsk->state = TASK_INTERRUPTIBLE;
+					schedule_timeout(HZ);
+				} else {	
+					break;
+				}
+			}
 			if (tsk->need_resched)
 				schedule();
 		}
--- linux-2.2.15pre9/mm/oom_kill.c.orig	Tue Feb 22 12:27:18 2000
+++ linux-2.2.15pre9/mm/oom_kill.c	Tue Feb 22 12:34:26 2000
@@ -0,0 +1,188 @@
+/*
+ *  linux/mm/oom_kill.c
+ * 
+ *  Copyright (C)  1998,2000  Rik van Riel
+ *	Thanks go out to Claus Fischer for some serious inspiration and
+ *	for goading me into coding this file...
+ *
+ *  The routines in this file are used to kill a process when
+ *  we're seriously out of memory. This gets called from kswapd()
+ *  in linux/mm/vmscan.c when we really run out of memory.
+ *
+ *  Since we won't call these routines often (on a well-configured
+ *  machine) this file will double as a 'coding guide' and a signpost
+ *  for newbie kernel hackers. It features several pointers to major
+ *  kernel subsystems and hints as to where to find out what things do.
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/timex.h>
+
+/* #define DEBUG */
+#define min(a,b) (((a)<(b))?(a):(b))
+
+/*
+ * A rough approximation to the sqrt() function.
+ */
+inline int int_sqrt(unsigned int x)
+{
+	unsigned int out = x;
+	while (x & ~(unsigned int)1) x >>=2, out >>=1;
+	if (x) out -= out >> 2;
+	return (out ? out : 1);
+}	
+
+/*
+ * Basically, points = size / (sqrt(CPU_used) * sqrt(sqrt(time_running)))
+ * with some bonusses/penalties.
+ *
+ * We try to chose our `guilty' task in such a way that we free
+ * up the maximum amount of memory and lose the minimum amount of
+ * done work.
+ *
+ * The definition of the task_struct, the structure describing the state
+ * of each process, can be found in include/linux/sched.h. For
+ * capability info, you should read include/linux/capability.h.
+ */
+
+inline int badness(struct task_struct *p)
+{
+	int points = p->mm->total_vm;
+	points /= int_sqrt((p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3));
+	points /= int_sqrt(int_sqrt((jiffies - p->start_time) >> (SHIFT_HZ + 10)));
+/*
+ * Niced processes are probably less important; kernel/sched.c
+ * and include/linux/sched.h contain most info on scheduling.
+ */
+	if (p->priority < DEF_PRIORITY)
+		points <<= 1;
+/*
+ * p->(e)uid is the process User ID, ID 0 is root, the super user.
+ * The super user usually only runs (important) system services
+ * and properly checked programs which we don't want to kill.
+ */
+	if (p->uid == 0 || p->euid == 0 || cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN))
+		points >>= 2;
+/*
+ * We don't want to kill a process with direct hardware access.
+ * Not only could this mess up the hardware, but these processes
+ * are usually fairly important too.
+ */
+	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
+		points >>= 1;
+#ifdef DEBUG
+	printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
+	p->pid, p->comm, points);
+#endif
+	return points;
+}
+
+/*
+ * Simple selection loop. We chose the process with the highest
+ * number of 'points'. We need the locks to make sure that the
+ * list of task structs doesn't change while we look the other way.
+ */
+inline struct task_struct * select_bad_process(void)
+{
+	int points = 0, maxpoints = 0;
+	struct task_struct *p = NULL;
+	struct task_struct *chosen = NULL;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p)
+	{
+		if (p->pid)
+			points = badness(p);
+		if (points > maxpoints) {
+			chosen = p;
+			maxpoints = points;
+		}
+	}
+	read_unlock(&tasklist_lock);
+	return chosen;
+}
+
+/*
+ * We kill the 'best' process and print a message to userspace.
+ * The only things to be careful about are:
+ *  - don't SIGKILL a process with direct hardware access.
+ *  - are we killing ourselves?
+ *  - when we kill someone else, can we sleep and get out of the way?
+ */
+void oom_kill(unsigned long gfp_mask)
+{
+
+	struct task_struct *p = select_bad_process();
+
+	if (p == NULL)
+		return;
+
+	if (p == current) {
+		printk(KERN_ERR "Out of Memory: Killed process %d (%s).",
+			 p->pid, p->comm);
+	} else {
+		printk(KERN_ERR "Out of Memory: Killed process %d (%s), "
+			"saved process %d (%s).",
+			p->pid, p->comm, current->pid, current->comm);
+	}
+
+	/* This process has hardware access, be more careful */
+	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
+		force_sig(SIGTERM, p);
+	} else {
+		force_sig(SIGKILL, p);
+	}
+
+	/* Get out of the way so that p can die */
+	if (p != current && (gfp_mask & __GFP_WAIT) && current->state == TASK_RUNNING) {
+		p->counter = 2 * DEF_PRIORITY;
+		current->policy |= SCHED_YIELD;
+		schedule();
+	}
+	return;
+}
+
+/*
+ * We are called when __get_free_pages() thinks the system may
+ * be out of memory. If we really are out of memory, we can do
+ * nothing except freeing up memory by killing a process...
+ */
+
+int out_of_memory(unsigned long gfp_mask)
+{
+	int count = page_cluster;
+	int loop = 0;
+	int freed = 0;
+
+again:
+	if (gfp_mask & __GFP_WAIT) {
+		/* Try to free up some memory */
+		current->flags |= PF_MEMALLOC;
+		do {
+			freed += try_to_free_pages(gfp_mask);
+			run_task_queue(&tq_disk);
+			if (freed && nr_free_pages > freepages.min) {
+				current->flags &= ~PF_MEMALLOC;
+				return 0;
+			}
+		} while (--count);
+		current->flags &= ~PF_MEMALLOC;
+	}
+
+	/* Darn, we failed. Now we have to kill something */
+	if (!loop)
+		oom_kill(gfp_mask);
+
+	if (nr_free_pages > freepages.min)
+		return 0;
+	if (!loop) {
+		loop = 1;
+		goto again;
+	}
+	/* Still out of memory, let the caller deal with it */
+	return 1;
+}
--- linux-2.2.15pre9/mm/Makefile.orig	Tue Feb 22 12:27:40 2000
+++ linux-2.2.15pre9/mm/Makefile	Tue Feb 22 12:18:26 2000
@@ -9,7 +9,7 @@
 
 O_TARGET := mm.o
 O_OBJS	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
-	    vmalloc.o slab.o \
+	    vmalloc.o slab.o oom_kill.o \
 	    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
 
 include $(TOPDIR)/Rules.make

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2000-02-22 14:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2000-02-22 14:39 [PATCH] OOM handling 2.2.15pre9 Rik van Riel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox