Re: [patch] real-time enhanced page allocator and throttling

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Robert Love <rml@tech9.net>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Valdis.Kletnieks@vt.edu,
	piggin@cyberone.com.au, kernel@kolivas.org, linux-mm@kvack.org
Subject: Re: [patch] real-time enhanced page allocator and throttling
Date: 05 Aug 2003 17:39:28 -0700	[thread overview]
Message-ID: <1060130368.4494.166.camel@localhost> (raw)
In-Reply-To: <20030805170954.59385c78.akpm@osdl.org>

On Tue, 2003-08-05 at 17:09, Andrew Morton wrote:

> -void balance_dirty_pages(struct address_space *mapping)
> +static void balance_dirty_pages(struct address_space *mapping)

Hrm. void? I have this as an int in my tree (test2-mm4), did you change
something? The function returns stuff.. I made it a 'static int'

>  		dirty_exceeded = 1;
> +		if (rt_task(current))
> +			break;

OK, this was my other option. I think this is better because, as we have
both said, it allows us to wake up pdflush.

Here is what I have right now, now ..

	Robert Love


 include/linux/sched.h |    4 +++-
 kernel/sched.c        |    1 -
 mm/page-writeback.c   |   11 +++++++++--
 mm/page_alloc.c       |   31 ++++++++++++++++++++++---------
 4 files changed, 34 insertions(+), 13 deletions(-)


diff -urN linux-2.6.0-test2-mm4/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.0-test2-mm4/include/linux/sched.h	2003-08-05 14:53:47.000000000 -0700
+++ linux/include/linux/sched.h	2003-08-05 12:38:41.000000000 -0700
@@ -282,7 +282,9 @@
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
- 
+
+#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
+
 /*
  * Some day this will be a full-fledged user tracking system..
  */
diff -urN linux-2.6.0-test2-mm4/kernel/sched.c linux/kernel/sched.c
--- linux-2.6.0-test2-mm4/kernel/sched.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/kernel/sched.c	2003-08-05 12:38:29.000000000 -0700
@@ -199,7 +199,6 @@
 #define this_rq()		(cpu_rq(smp_processor_id())) /* not __get_cpu_var(runqueues)! */
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define rt_task(p)		((p)->prio < MAX_RT_PRIO)
 
 /*
  * Default context-switch locking:
diff -urN linux-2.6.0-test2-mm4/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.6.0-test2-mm4/mm/page_alloc.c	2003-08-05 14:48:38.000000000 -0700
+++ linux/mm/page_alloc.c	2003-08-05 17:22:30.000000000 -0700
@@ -518,7 +518,8 @@
  *
  * Herein lies the mysterious "incremental min".  That's the
  *
- *	min += z->pages_low;
+ *	local_low = z->pages_low;
+ *	min += local_low;
  *
  * thing.  The intent here is to provide additional protection to low zones for
  * allocation requests which _could_ use higher zones.  So a GFP_HIGHMEM
@@ -536,10 +537,11 @@
 	unsigned long min;
 	struct zone **zones, *classzone;
 	struct page *page;
+	struct reclaim_state reclaim_state;
+	struct task_struct *p = current;
 	int i;
 	int cold;
 	int do_retry;
-	struct reclaim_state reclaim_state;
 
 	if (wait)
 		might_sleep();
@@ -557,8 +559,17 @@
 	min = 1UL << order;
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *z = zones[i];
+		unsigned long local_low;
+
+		/*
+		 * This is the fabled 'incremental min'. We let real-time tasks
+		 * dip their real-time paws a little deeper into reserves.
+		 */
+		local_low = z->pages_low;
+		if (rt_task(p))
+			local_low >>= 1;
+		min += local_low;
 
-		min += z->pages_low;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
@@ -581,6 +592,8 @@
 		local_min = z->pages_min;
 		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
+		if (rt_task(p))
+			local_min >>= 1;
 		min += local_min;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
@@ -594,7 +607,7 @@
 	/* here we're in the low on memory slow path */
 
 rebalance:
-	if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+	if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
 		/* go through the zonelist yet again, ignoring mins */
 		for (i = 0; zones[i] != NULL; i++) {
 			struct zone *z = zones[i];
@@ -610,14 +623,14 @@
 	if (!wait)
 		goto nopage;
 
-	current->flags |= PF_MEMALLOC;
+	p->flags |= PF_MEMALLOC;
 	reclaim_state.reclaimed_slab = 0;
-	current->reclaim_state = &reclaim_state;
+	p->reclaim_state = &reclaim_state;
 
 	try_to_free_pages(classzone, gfp_mask, order);
 
-	current->reclaim_state = NULL;
-	current->flags &= ~PF_MEMALLOC;
+	p->reclaim_state = NULL;
+	p->flags &= ~PF_MEMALLOC;
 
 	/* go through the zonelist yet one more time */
 	min = 1UL << order;
@@ -657,7 +670,7 @@
 	if (!(gfp_mask & __GFP_NOWARN)) {
 		printk("%s: page allocation failure."
 			" order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
+			p->comm, order, gfp_mask);
 	}
 	return NULL;
 got_pg:
diff -urN linux-2.6.0-test2-mm4/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.6.0-test2-mm4/mm/page-writeback.c	2003-08-05 14:53:47.000000000 -0700
+++ linux/mm/page-writeback.c	2003-08-05 17:35:36.095648523 -0700
@@ -145,7 +145,7 @@
  * If we're over `background_thresh' then pdflush is woken to perform some
  * writeout.
  */
-int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages(struct address_space *mapping)
 {
 	struct page_state ps;
 	long nr_reclaimable;
@@ -169,9 +169,16 @@
 		nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
 		if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
 			break;
-
 		dirty_exceeded = 1;
 
+		/*
+		 * We do not want to throttle a real-time task here. Ever.
+		 * But we do want to update the accounting and possibly poke
+		 * pdflush below.
+		 */
+		if (rt_task(current))
+			break;
+
 		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
 		 * Unstable writes are a feature of certain networked
 		 * filesystems (i.e. NFS) in which data may have been


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

next prev parent reply	other threads:[~2003-08-06  0:39 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-08-05 22:13 Robert Love
2003-08-06  0:09 ` Andrew Morton
2003-08-06  0:39   ` Robert Love [this message]
2003-08-06  0:45     ` Andrew Morton
2003-08-06  3:58       ` Robert Love
2003-08-06  8:41         ` Andrew Morton
2003-08-06 17:01           ` Robert Love

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1060130368.4494.166.camel@localhost \
    --to=rml@tech9.net \
    --cc=Valdis.Kletnieks@vt.edu \
    --cc=akpm@osdl.org \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=piggin@cyberone.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox