[PATCH] kill flush_dirty

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] kill flush_dirty_buffers
@ 2001-08-06 17:09 Chris Mason
  2001-08-06 18:00 ` Linus Torvalds
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Mason @ 2001-08-06 17:09 UTC (permalink / raw)
  To: linux-mm; +Cc: torvalds

Hi guys,

I've been mucking with buffer.c recently, so I took a few minutes
to replace flush_dirty_buffers with write_unlocked_buffers().  

Patch is lightly tested on ext2 and reiserfs, use at your own risk
for now.  Linus, if this is what you were talking about in the
vm suckage thread, I'll test/benchmark harder....

-chris

diff -Nru a/fs/buffer.c b/fs/buffer.c
--- a/fs/buffer.c	Mon Aug  6 13:02:46 2001
+++ b/fs/buffer.c	Mon Aug  6 13:02:46 2001
@@ -195,24 +195,35 @@
 }
 
 #define NRSYNC (32)
-static void write_unlocked_buffers(kdev_t dev)
+static int write_unlocked_buffers(kdev_t dev, int max, int check_flushtime)
 {
 	struct buffer_head *next;
 	struct buffer_head *array[NRSYNC];
 	unsigned int count;
 	int nr;
+	int total_flushed = 0 ;
 
 repeat:
 	spin_lock(&lru_list_lock);
 	next = lru_list[BUF_DIRTY];
 	nr = nr_buffers_type[BUF_DIRTY] * 2;
 	count = 0;
-	while (next && --nr >= 0) {
+	while ((!max || total_flushed < max) && next && --nr >= 0) {
 		struct buffer_head * bh = next;
 		next = bh->b_next_free;
 
 		if (dev && bh->b_dev != dev)
 			continue;
+
+		if (check_flushtime) {
+			/* The dirty lru list is chronologically ordered so
+			   if the current bh is not yet timed out,
+			   then also all the following bhs
+			   will be too young. */
+			if (time_before(jiffies, bh->b_flushtime))
+				break ;
+		}
+		
 		if (test_and_set_bit(BH_Lock, &bh->b_state))
 			continue;
 		get_bh(bh);
@@ -224,6 +235,7 @@
 
 			spin_unlock(&lru_list_lock);
 			write_locked_buffers(array, count);
+			total_flushed += count ;
 			goto repeat;
 		}
 		unlock_buffer(bh);
@@ -231,8 +243,11 @@
 	}
 	spin_unlock(&lru_list_lock);
 
-	if (count)
+	if (count) {
 		write_locked_buffers(array, count);
+		total_flushed += count ;
+	}
+	return total_flushed ;
 }
 
 static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
@@ -286,10 +301,10 @@
 	 * 2) write out all dirty, unlocked buffers;
 	 * 2) wait for completion by waiting for all buffers to unlock.
 	 */
-	write_unlocked_buffers(dev);
+	write_unlocked_buffers(dev, 0, 0);
 	if (wait) {
 		err = wait_for_locked_buffers(dev, BUF_DIRTY, 0);
-		write_unlocked_buffers(dev);
+		write_unlocked_buffers(dev, 0, 0);
 		err |= wait_for_locked_buffers(dev, BUF_LOCKED, 1);
 	}
 	return err;
@@ -2524,60 +2539,6 @@
  * a limited number of buffers to the disks and then go back to sleep again.
  */
 
-/* This is the _only_ function that deals with flushing async writes
-   to disk.
-   NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list
-   as all dirty buffers lives _only_ in the DIRTY lru list.
-   As we never browse the LOCKED and CLEAN lru lists they are infact
-   completly useless. */
-static int flush_dirty_buffers(int check_flushtime)
-{
-	struct buffer_head * bh, *next;
-	int flushed = 0, i;
-
- restart:
-	spin_lock(&lru_list_lock);
-	bh = lru_list[BUF_DIRTY];
-	if (!bh)
-		goto out_unlock;
-	for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) {
-		next = bh->b_next_free;
-
-		if (!buffer_dirty(bh)) {
-			__refile_buffer(bh);
-			continue;
-		}
-		if (buffer_locked(bh))
-			continue;
-
-		if (check_flushtime) {
-			/* The dirty lru list is chronologically ordered so
-			   if the current bh is not yet timed out,
-			   then also all the following bhs
-			   will be too young. */
-			if (time_before(jiffies, bh->b_flushtime))
-				goto out_unlock;
-		} else {
-			if (++flushed > bdf_prm.b_un.ndirty)
-				goto out_unlock;
-		}
-
-		/* OK, now we are committed to write it out. */
-		get_bh(bh);
-		spin_unlock(&lru_list_lock);
-		ll_rw_block(WRITE, 1, &bh);
-		put_bh(bh);
-
-		if (current->need_resched)
-			schedule();
-		goto restart;
-	}
- out_unlock:
-	spin_unlock(&lru_list_lock);
-
-	return flushed;
-}
-
 DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
 
 void wakeup_bdflush(int block)
@@ -2586,7 +2547,7 @@
 		wake_up_interruptible(&bdflush_wait);
 
 	if (block)
-		flush_dirty_buffers(0);
+		write_unlocked_buffers(NODEV, bdf_prm.b_un.ndirty, 0) ;
 }
 
 /* 
@@ -2604,7 +2565,7 @@
 	sync_supers(0);
 	unlock_kernel();
 
-	flush_dirty_buffers(1);
+	write_unlocked_buffers(NODEV, 0, 1) ;
 	/* must really sync all the active I/O request to disk here */
 	run_task_queue(&tq_disk);
 	return 0;
@@ -2700,7 +2661,7 @@
 	for (;;) {
 		CHECK_EMERGENCY_SYNC
 
-		flushed = flush_dirty_buffers(0);
+		flushed = write_unlocked_buffers(NODEV, bdf_prm.b_un.ndirty, 0);
 
 		/*
 		 * If there are still a lot of dirty buffers around,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] kill flush_dirty_buffers
  2001-08-06 17:09 [PATCH] kill flush_dirty_buffers Chris Mason
@ 2001-08-06 18:00 ` Linus Torvalds
  2001-08-06 18:39   ` Rik van Riel
  2001-08-06 21:14   ` Chris Mason
  0 siblings, 2 replies; 6+ messages in thread
From: Linus Torvalds @ 2001-08-06 18:00 UTC (permalink / raw)
  To: Chris Mason; +Cc: linux-mm

On Mon, 6 Aug 2001, Chris Mason wrote:
>
> Patch is lightly tested on ext2 and reiserfs, use at your own risk
> for now.  Linus, if this is what you were talking about in the
> vm suckage thread, I'll test/benchmark harder....

This is what I was talking about, but I'd rather have two separate
functions. Right now we have a simple "write_unlocked_buffers()" that is
very straightforward, and I hate having "flags" arguments to functions
that change their behaviour.

In general, the kind of code I like is

	static int generic_helper_fn(...)
	{
	...
	}

	static int another_helper_fn(..)
	{
	...
	}

	static int one_special_case(args)
	{
		if (generic_helper_fn(x) < args)
			another_helper_fn();
	}

	static int another_special_case(void)
	{
		while (another_helper_fn())
			wait_for_results();
	}

Rather than trying to have

	static int both_special_cases(args)
	{
		if (args) {
			if (generic_helper_fn(x) < args)
				another_helper_fn();
		} else {
			while (another_helper_fn())
				wait_for_results();
		}
	}

if you see what I mean?

I know that Computer Science is all about finding the generic solution to
a problem. But the fact is, that the specific solutions are usually
simpler and easier to understand, and if you name your functions
appropriately, it's MUCH easier to understand code that does

	age_page_up_locked(page);

than code that does

	age_page_up(page, 1);

(where "1" is the argument that tells the function that we've already
locked the page).

So I'd rather have a simple and straightforward function called

	write_unlocked_buffers(kdev_t dev)

that just writes all the buffers for that device, and then have _another_
function that does the flushtime checking etc, and is called something
appropriate.

And if they have code that is _unconditionally_ common, then that code can
be made a inline function or something, so that the two functions
themselves are smaller.

The other issue is that I suspect that "flushtime" is completely useless
these days, and should just be dropped. If we've decided to start flushing
stuff out, we shouldn't stop flushing just because some buffer hasn't
quite reached the proper age yet. We'd have been better off maybe deciding
not to even _start_ flushing at all, but once we've started, we might as
well do the dirty buffers we see (up to a maximum that is due to IO
_latency_, not due to "how long since this buffer was dirtied")

		Linus

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] kill flush_dirty_buffers
  2001-08-06 18:00 ` Linus Torvalds
@ 2001-08-06 18:39   ` Rik van Riel
  2001-08-06 19:53     ` Daniel Phillips
  2001-08-06 21:14   ` Chris Mason
  1 sibling, 1 reply; 6+ messages in thread
From: Rik van Riel @ 2001-08-06 18:39 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Chris Mason, linux-mm

On Mon, 6 Aug 2001, Linus Torvalds wrote:

> The other issue is that I suspect that "flushtime" is completely useless
> these days, and should just be dropped. If we've decided to start flushing
> stuff out, we shouldn't stop flushing just because some buffer hasn't
> quite reached the proper age yet. We'd have been better off maybe deciding
> not to even _start_ flushing at all, but once we've started, we might as
> well do the dirty buffers we see (up to a maximum that is due to IO
> _latency_, not due to "how long since this buffer was dirtied")

OTOH, we don't want flushing to _stop_ early because we already
submitted lots of IO ;)

Rik
--
Executive summary of a recent Microsoft press release:
   "we are concerned about the GNU General Public License (GPL)"


		http://www.surriel.com/
http://www.conectiva.com/	http://distro.conectiva.com/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] kill flush_dirty_buffers
  2001-08-06 18:39   ` Rik van Riel
@ 2001-08-06 19:53     ` Daniel Phillips
  2001-08-07 21:19       ` Jens Axboe
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Phillips @ 2001-08-06 19:53 UTC (permalink / raw)
  To: Rik van Riel, Linus Torvalds; +Cc: Chris Mason, linux-mm

On Monday 06 August 2001 20:39, Rik van Riel wrote:
> On Mon, 6 Aug 2001, Linus Torvalds wrote:
> > The other issue is that I suspect that "flushtime" is completely
> > useless these days, and should just be dropped. If we've decided to
> > start flushing stuff out, we shouldn't stop flushing just because
> > some buffer hasn't quite reached the proper age yet.

It's still useful for making sure dirty buffers don't get too old.

> > We'd have been
> > better off maybe deciding not to even _start_ flushing at all, but
> > once we've started, we might as well do the dirty buffers we see (up
> > to a maximum that is due to IO _latency_, not due to "how long since
> > this buffer was dirtied")

*nod*

Where IO latency isn't that well defined at the moment.  Consider a a 
slow and a fast writable block device on the same system.  The ideal 
length of the IO queue depends on which has most of the IO activity.

This suggests that buffer flushing needs to be per-device.

--
Daniel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] kill flush_dirty_buffers
  2001-08-06 19:53     ` Daniel Phillips
@ 2001-08-07 21:19       ` Jens Axboe
  0 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2001-08-07 21:19 UTC (permalink / raw)
  To: Daniel Phillips; +Cc: Rik van Riel, Linus Torvalds, Chris Mason, linux-mm

On Mon, Aug 06 2001, Daniel Phillips wrote:
> This suggests that buffer flushing needs to be per-device.

Yep, and either stopped as soon as the queue free lists are empty or
done independently per queue. One thought I had was to actually move the
dirty list to the queues (or at least separated) and have the plugs
_pull_ the buffers out instead of the other way around.

-- 
Jens Axboe

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] kill flush_dirty_buffers
  2001-08-06 18:00 ` Linus Torvalds
  2001-08-06 18:39   ` Rik van Riel
@ 2001-08-06 21:14   ` Chris Mason
  1 sibling, 0 replies; 6+ messages in thread
From: Chris Mason @ 2001-08-06 21:14 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-mm


On Monday, August 06, 2001 11:00:02 AM -0700 Linus Torvalds <torvalds@transmeta.com> wrote:

> 
> On Mon, 6 Aug 2001, Chris Mason wrote:
>> 
>> Patch is lightly tested on ext2 and reiserfs, use at your own risk
>> for now.  Linus, if this is what you were talking about in the
>> vm suckage thread, I'll test/benchmark harder....
> 
> This is what I was talking about, but I'd rather have two separate
> functions. Right now we have a simple "write_unlocked_buffers()" that is
> very straightforward, and I hate having "flags" arguments to functions
> that change their behaviour.

Yes, I had somehow read that you wanted write_unlocked_buffers to look 
more like flush_dirty_buffers...whoops.  Anyway, below is a slightly 
different patch.  The check_flushtime idea is changed, kupdate 
writes a minimum of b_un.ndirty buffers, even if the first buffer it finds
is too young.

Same light testing as before, same warning label ;-)

Daniel, I left the kdev_t parameter to all flavors of write_unlocked_buffers.
It should be possible to experiment with flushes per device.

-chris

--- linux-248p4/fs/buffer.c	Mon Aug  6 16:58:58 2001
+++ linux/fs/buffer.c	Mon Aug  6 17:08:43 2001
@@ -195,18 +195,22 @@
 }
 
 #define NRSYNC (32)
-static void write_unlocked_buffers(kdev_t dev)
-{
-	struct buffer_head *next;
-	struct buffer_head *array[NRSYNC];
-	unsigned int count;
-	int nr;
 
-repeat:
-	spin_lock(&lru_list_lock);
-	next = lru_list[BUF_DIRTY];
-	nr = nr_buffers_type[BUF_DIRTY] * 2;
-	count = 0;
+/* we set start to point to the last buffer we write, that way callers
+** can check the age of that buffer to see if they think they've flushed
+** enough
+**
+** the number of buffers written is returned.  If this is less than
+** NRSYNC, it is because we could not find enough dirty unlocked buffers on
+** the list to write out.
+*/
+static int __write_unlocked_buffers(kdev_t dev, struct buffer_head **start)
+{
+	int count = 0 ;
+	struct buffer_head *array[NRSYNC] ;
+	struct buffer_head *next = *start ;
+	int nr = nr_buffers_type[BUF_DIRTY] * 2;
+
 	while (next && --nr >= 0) {
 		struct buffer_head * bh = next;
 		next = bh->b_next_free;
@@ -219,12 +223,13 @@
 		if (atomic_set_buffer_clean(bh)) {
 			__refile_buffer(bh);
 			array[count++] = bh;
+			*start = bh ;
 			if (count < NRSYNC)
 				continue;
 
 			spin_unlock(&lru_list_lock);
 			write_locked_buffers(array, count);
-			goto repeat;
+			return count ;
 		}
 		unlock_buffer(bh);
 		put_bh(bh);
@@ -233,6 +238,63 @@
 
 	if (count)
 		write_locked_buffers(array, count);
+
+	if (current->need_resched) {
+		run_task_queue(&tq_disk) ;
+		schedule() ;
+	}
+	return count ;
+}
+
+static void write_unlocked_buffers(kdev_t dev)
+{
+	struct buffer_head *next;
+	int count ;
+
+	do {
+		spin_lock(&lru_list_lock);
+		next = lru_list[BUF_DIRTY];
+		count = __write_unlocked_buffers(dev, &next) ;
+	} while (count >= NRSYNC) ;
+}
+
+static int flush_dirty_buffers(kdev_t dev)
+{
+	struct buffer_head *next;
+	int count ;
+	int total = 0 ;
+
+	do {
+		spin_lock(&lru_list_lock);
+		next = lru_list[BUF_DIRTY];
+		count = __write_unlocked_buffers(dev, &next) ;
+		total += count ;
+		if (total >= bdf_prm.b_un.ndirty)
+			break ;
+	} while (count >= NRSYNC) ;
+	return total ;
+}
+
+static int flush_old_buffers(kdev_t dev)
+{
+	struct buffer_head *next;
+	int count ;
+	int total = 0 ;
+
+	do {
+		spin_lock(&lru_list_lock);
+		next = lru_list[BUF_DIRTY];
+		count = __write_unlocked_buffers(dev, &next) ;
+		total += count ;
+
+		/* once we get past the oldest buffers, keep
+		** going until we've written a full ndirty cycle
+		*/
+		if (total >= bdf_prm.b_un.ndirty && next && 
+		    time_before(jiffies, next->b_flushtime))
+				break ;
+	} while (count >= NRSYNC) ;
+	return total ;
 }
 
 static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
@@ -2524,60 +2586,6 @@
  * a limited number of buffers to the disks and then go back to sleep again.
  */
 
-/* This is the _only_ function that deals with flushing async writes
-   to disk.
-   NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list
-   as all dirty buffers lives _only_ in the DIRTY lru list.
-   As we never browse the LOCKED and CLEAN lru lists they are infact
-   completly useless. */
-static int flush_dirty_buffers(int check_flushtime)
-{
-	struct buffer_head * bh, *next;
-	int flushed = 0, i;
-
- restart:
-	spin_lock(&lru_list_lock);
-	bh = lru_list[BUF_DIRTY];
-	if (!bh)
-		goto out_unlock;
-	for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) {
-		next = bh->b_next_free;
-
-		if (!buffer_dirty(bh)) {
-			__refile_buffer(bh);
-			continue;
-		}
-		if (buffer_locked(bh))
-			continue;
-
-		if (check_flushtime) {
-			/* The dirty lru list is chronologically ordered so
-			   if the current bh is not yet timed out,
-			   then also all the following bhs
-			   will be too young. */
-			if (time_before(jiffies, bh->b_flushtime))
-				goto out_unlock;
-		} else {
-			if (++flushed > bdf_prm.b_un.ndirty)
-				goto out_unlock;
-		}
-
-		/* OK, now we are committed to write it out. */
-		get_bh(bh);
-		spin_unlock(&lru_list_lock);
-		ll_rw_block(WRITE, 1, &bh);
-		put_bh(bh);
-
-		if (current->need_resched)
-			schedule();
-		goto restart;
-	}
- out_unlock:
-	spin_unlock(&lru_list_lock);
-
-	return flushed;
-}
-
 DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
 
 void wakeup_bdflush(int block)
@@ -2586,7 +2594,7 @@
 		wake_up_interruptible(&bdflush_wait);
 
 	if (block)
-		flush_dirty_buffers(0);
+		flush_dirty_buffers(NODEV);
 }
 
 /* 
@@ -2604,7 +2612,7 @@
 	sync_supers(0);
 	unlock_kernel();
 
-	flush_dirty_buffers(1);
+	flush_old_buffers(NODEV);
 	/* must really sync all the active I/O request to disk here */
 	run_task_queue(&tq_disk);
 	return 0;
@@ -2700,7 +2708,7 @@
 	for (;;) {
 		CHECK_EMERGENCY_SYNC
 
-		flushed = flush_dirty_buffers(0);
+		flushed = flush_dirty_buffers(NODEV);
 
 		/*
 		 * If there are still a lot of dirty buffers around,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2001-08-07 21:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-08-06 17:09 [PATCH] kill flush_dirty_buffers Chris Mason
2001-08-06 18:00 ` Linus Torvalds
2001-08-06 18:39   ` Rik van Riel
2001-08-06 19:53     ` Daniel Phillips
2001-08-07 21:19       ` Jens Axboe
2001-08-06 21:14   ` Chris Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox