linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nikita Danilov <nikita@clusterfs.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <AKPM@Osdl.ORG>
Subject: [PATCH]: VM 3/8 PG_skipped
Date: Sun, 17 Apr 2005 21:36:03 +0400	[thread overview]
Message-ID: <16994.40579.617974.423522@gargle.gargle.HOWL> (raw)

Don't call ->writepage from VM scanner when page is met for the first time
during scan.

New page flag PG_skipped is used for this. This flag is TestSet-ed just
before calling ->writepage and is cleaned when page enters inactive
list.

One can see this as "second chance" algorithm for the dirty pages on the
inactive list.

BSD does the same: src/sys/vm/vm_pageout.c:vm_pageout_scan(),
PG_WINATCFLS flag.

Reason behind this is that ->writepages() will perform more efficient writeout
than ->writepage(). Skipping of page can be conditioned on zone->pressure.

On the other hand, avoiding ->writepage() increases amount of scanning
performed by kswapd.

Signed-off-by: Nikita Danilov <nikita@clusterfs.com>


 include/linux/page-flags.h |    7 +++
 mm/swap.c                  |    1 
 mm/truncate.c              |    2 +
 mm/vmscan.c                |   80 +++++++++++++++++++++++++++++++--------------
 4 files changed, 66 insertions(+), 24 deletions(-)

diff -puN mm/vmscan.c~skip-writepage mm/vmscan.c
--- bk-linux/mm/vmscan.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/vmscan.c	2005-04-17 17:52:49.000000000 +0400
@@ -331,18 +329,50 @@ static pageout_t pageout(struct page *pa
 		return PAGE_ACTIVATE;
 	if (!may_write_to_queue(mapping->backing_dev_info))
 		return PAGE_KEEP;
-
+	/*
+	 * Don't call ->writepage when page is met for the first time during
+	 * scanning. Reasons:
+	 *
+	 *     1. if memory pressure is not too high, skipping ->writepage()
+	 *     may avoid writing out page that will be re-dirtied (should not
+	 *     be too important, because scanning starts from the tail of
+	 *     inactive list, where pages are _supposed_ to be rarely used,
+	 *     but when under constant memory pressure, inactive list is
+	 *     rotated and so is more FIFO than LRU).
+	 *
+	 *     2. ->writepages() writes data more efficiently than
+	 *     ->writepage().
+	 */
+	if (!TestSetPageSkipped(page))
+		return PAGE_KEEP;
 	if (clear_page_dirty_for_io(page)) {
 		int res;
+
 		struct writeback_control wbc = {
 			.sync_mode = WB_SYNC_NONE,
 			.nr_to_write = SWAP_CLUSTER_MAX,
-			.nonblocking = 1,
-			.for_reclaim = 1,
+			/*
+			 * synchronous page reclamation should be non blocking
+			 * for the reasons outlined in the comment above. But
+			 * in the kswapd blocking is ok.
+			 *
+			 * NOTE:
+			 *
+			 *     1. .nonblocking is not analyzed by existing
+			 *     in-tree implementations of ->writepage().
+			 *
+			 *     2. may be if page zone is under considerable
+			 *     memory pressure (zone->prev_priority is low),
+			 *     .nonblocking should be set anyway.
+			 */
+			.nonblocking = !current_is_kswapd(),
+			.for_reclaim = 1 /* XXX not used */
 		};
 
+		ClearPageSkipped(page);
 		SetPageReclaim(page);
 		res = mapping->a_ops->writepage(page, &wbc);
+
 		if (res < 0)
 			handle_write_error(mapping, page, res);
 		if (res == WRITEPAGE_ACTIVATE) {
@@ -353,10 +383,8 @@ static pageout_t pageout(struct page *pa
 			/* synchronous write or broken a_ops? */
 			ClearPageReclaim(page);
 		}
-
 		return PAGE_SUCCESS;
 	}
-
 	return PAGE_CLEAN;
 }
 
@@ -643,10 +671,13 @@ static void shrink_cache(struct zone *zo
 			if (TestSetPageLRU(page))
 				BUG();
 			list_del(&page->lru);
-			if (PageActive(page))
+			if (PageActive(page)) {
+				if (PageSkipped(page))
+					ClearPageSkipped(page);
 				add_page_to_active_list(zone, page);
-			else
+			} else {
 				add_page_to_inactive_list(zone, page);
+			}
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -757,6 +788,7 @@ refill_inactive_zone(struct zone *zone, 
 			BUG();
 		if (!TestClearPageActive(page))
 			BUG();
+		ClearPageSkipped(page);
 		list_move(&page->lru, &zone->inactive_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
diff -puN include/linux/page-flags.h~skip-writepage include/linux/page-flags.h
--- bk-linux/include/linux/page-flags.h~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/include/linux/page-flags.h	2005-04-17 17:52:49.000000000 +0400
@@ -76,6 +76,7 @@
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
 #define PG_uncached		20	/* Page has been mapped as uncached */
+#define PG_skipped		21	/* ->writepage() was skipped */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -161,6 +162,12 @@ extern void __mod_page_state(unsigned of
 		__mod_page_state(offset, (delta));				\
 	} while (0)
 
+#define PageSkipped(page)	test_bit(PG_skipped, &(page)->flags)
+#define SetPageSkipped(page)	set_bit(PG_skipped, &(page)->flags)
+#define TestSetPageSkipped(page)	test_and_set_bit(PG_skipped, &(page)->flags)
+#define ClearPageSkipped(page)		clear_bit(PG_skipped, &(page)->flags)
+#define TestClearPageSkipped(page)	test_and_clear_bit(PG_skipped, &(page)->flags)
+
 /*
  * Manipulation of page state flags
  */
diff -puN mm/truncate.c~skip-writepage mm/truncate.c
--- bk-linux/mm/truncate.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/truncate.c	2005-04-17 17:52:49.000000000 +0400
@@ -54,6 +54,7 @@ truncate_complete_page(struct address_sp
 	clear_page_dirty(page);
 	ClearPageUptodate(page);
 	ClearPageMappedToDisk(page);
+	ClearPageSkipped(page);
 	remove_from_page_cache(page);
 	page_cache_release(page);	/* pagecache ref */
 }
@@ -86,6 +87,7 @@ invalidate_complete_page(struct address_
 	__remove_from_page_cache(page);
 	write_unlock_irq(&mapping->tree_lock);
 	ClearPageUptodate(page);
+	ClearPageSkipped(page);
 	page_cache_release(page);	/* pagecache ref */
 	return 1;
 }
diff -puN mm/swap.c~skip-writepage mm/swap.c
--- bk-linux/mm/swap.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/swap.c	2005-04-17 17:52:49.000000000 +0400
@@ -303,6 +303,7 @@ void __pagevec_lru_add(struct pagevec *p
 		}
 		if (TestSetPageLRU(page))
 			BUG();
+		ClearPageSkipped(page);
 		add_page_to_inactive_list(zone, page);
 	}
 	if (zone)

_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

             reply	other threads:[~2005-04-17 17:36 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-17 17:36 Nikita Danilov [this message]
2005-04-18 15:12 ` Rik van Riel
2005-04-18 17:51   ` Dave Hansen
2005-04-18 20:29     ` Nikita Danilov
2005-04-26  3:43 ` Andrew Morton
2005-05-02  9:03   ` Hirokazu Takahashi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=16994.40579.617974.423522@gargle.gargle.HOWL \
    --to=nikita@clusterfs.com \
    --cc=AKPM@Osdl.ORG \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox