linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH]: VM 3/8 PG_skipped
@ 2005-04-17 17:36 Nikita Danilov
  2005-04-18 15:12 ` Rik van Riel
  2005-04-26  3:43 ` Andrew Morton
  0 siblings, 2 replies; 6+ messages in thread
From: Nikita Danilov @ 2005-04-17 17:36 UTC (permalink / raw)
  To: linux-mm; +Cc: Andrew Morton

Don't call ->writepage from VM scanner when page is met for the first time
during scan.

New page flag PG_skipped is used for this. This flag is TestSet-ed just
before calling ->writepage and is cleaned when page enters inactive
list.

One can see this as "second chance" algorithm for the dirty pages on the
inactive list.

BSD does the same: src/sys/vm/vm_pageout.c:vm_pageout_scan(),
PG_WINATCFLS flag.

Reason behind this is that ->writepages() will perform more efficient writeout
than ->writepage(). Skipping of page can be conditioned on zone->pressure.

On the other hand, avoiding ->writepage() increases amount of scanning
performed by kswapd.

Signed-off-by: Nikita Danilov <nikita@clusterfs.com>


 include/linux/page-flags.h |    7 +++
 mm/swap.c                  |    1 
 mm/truncate.c              |    2 +
 mm/vmscan.c                |   80 +++++++++++++++++++++++++++++++--------------
 4 files changed, 66 insertions(+), 24 deletions(-)

diff -puN mm/vmscan.c~skip-writepage mm/vmscan.c
--- bk-linux/mm/vmscan.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/vmscan.c	2005-04-17 17:52:49.000000000 +0400
@@ -331,18 +329,50 @@ static pageout_t pageout(struct page *pa
 		return PAGE_ACTIVATE;
 	if (!may_write_to_queue(mapping->backing_dev_info))
 		return PAGE_KEEP;
-
+	/*
+	 * Don't call ->writepage when page is met for the first time during
+	 * scanning. Reasons:
+	 *
+	 *     1. if memory pressure is not too high, skipping ->writepage()
+	 *     may avoid writing out page that will be re-dirtied (should not
+	 *     be too important, because scanning starts from the tail of
+	 *     inactive list, where pages are _supposed_ to be rarely used,
+	 *     but when under constant memory pressure, inactive list is
+	 *     rotated and so is more FIFO than LRU).
+	 *
+	 *     2. ->writepages() writes data more efficiently than
+	 *     ->writepage().
+	 */
+	if (!TestSetPageSkipped(page))
+		return PAGE_KEEP;
 	if (clear_page_dirty_for_io(page)) {
 		int res;
+
 		struct writeback_control wbc = {
 			.sync_mode = WB_SYNC_NONE,
 			.nr_to_write = SWAP_CLUSTER_MAX,
-			.nonblocking = 1,
-			.for_reclaim = 1,
+			/*
+			 * synchronous page reclamation should be non blocking
+			 * for the reasons outlined in the comment above. But
+			 * in the kswapd blocking is ok.
+			 *
+			 * NOTE:
+			 *
+			 *     1. .nonblocking is not analyzed by existing
+			 *     in-tree implementations of ->writepage().
+			 *
+			 *     2. may be if page zone is under considerable
+			 *     memory pressure (zone->prev_priority is low),
+			 *     .nonblocking should be set anyway.
+			 */
+			.nonblocking = !current_is_kswapd(),
+			.for_reclaim = 1 /* XXX not used */
 		};
 
+		ClearPageSkipped(page);
 		SetPageReclaim(page);
 		res = mapping->a_ops->writepage(page, &wbc);
+
 		if (res < 0)
 			handle_write_error(mapping, page, res);
 		if (res == WRITEPAGE_ACTIVATE) {
@@ -353,10 +383,8 @@ static pageout_t pageout(struct page *pa
 			/* synchronous write or broken a_ops? */
 			ClearPageReclaim(page);
 		}
-
 		return PAGE_SUCCESS;
 	}
-
 	return PAGE_CLEAN;
 }
 
@@ -643,10 +671,13 @@ static void shrink_cache(struct zone *zo
 			if (TestSetPageLRU(page))
 				BUG();
 			list_del(&page->lru);
-			if (PageActive(page))
+			if (PageActive(page)) {
+				if (PageSkipped(page))
+					ClearPageSkipped(page);
 				add_page_to_active_list(zone, page);
-			else
+			} else {
 				add_page_to_inactive_list(zone, page);
+			}
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -757,6 +788,7 @@ refill_inactive_zone(struct zone *zone, 
 			BUG();
 		if (!TestClearPageActive(page))
 			BUG();
+		ClearPageSkipped(page);
 		list_move(&page->lru, &zone->inactive_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
diff -puN include/linux/page-flags.h~skip-writepage include/linux/page-flags.h
--- bk-linux/include/linux/page-flags.h~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/include/linux/page-flags.h	2005-04-17 17:52:49.000000000 +0400
@@ -76,6 +76,7 @@
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
 #define PG_uncached		20	/* Page has been mapped as uncached */
+#define PG_skipped		21	/* ->writepage() was skipped */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -161,6 +162,12 @@ extern void __mod_page_state(unsigned of
 		__mod_page_state(offset, (delta));				\
 	} while (0)
 
+#define PageSkipped(page)	test_bit(PG_skipped, &(page)->flags)
+#define SetPageSkipped(page)	set_bit(PG_skipped, &(page)->flags)
+#define TestSetPageSkipped(page)	test_and_set_bit(PG_skipped, &(page)->flags)
+#define ClearPageSkipped(page)		clear_bit(PG_skipped, &(page)->flags)
+#define TestClearPageSkipped(page)	test_and_clear_bit(PG_skipped, &(page)->flags)
+
 /*
  * Manipulation of page state flags
  */
diff -puN mm/truncate.c~skip-writepage mm/truncate.c
--- bk-linux/mm/truncate.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/truncate.c	2005-04-17 17:52:49.000000000 +0400
@@ -54,6 +54,7 @@ truncate_complete_page(struct address_sp
 	clear_page_dirty(page);
 	ClearPageUptodate(page);
 	ClearPageMappedToDisk(page);
+	ClearPageSkipped(page);
 	remove_from_page_cache(page);
 	page_cache_release(page);	/* pagecache ref */
 }
@@ -86,6 +87,7 @@ invalidate_complete_page(struct address_
 	__remove_from_page_cache(page);
 	write_unlock_irq(&mapping->tree_lock);
 	ClearPageUptodate(page);
+	ClearPageSkipped(page);
 	page_cache_release(page);	/* pagecache ref */
 	return 1;
 }
diff -puN mm/swap.c~skip-writepage mm/swap.c
--- bk-linux/mm/swap.c~skip-writepage	2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/swap.c	2005-04-17 17:52:49.000000000 +0400
@@ -303,6 +303,7 @@ void __pagevec_lru_add(struct pagevec *p
 		}
 		if (TestSetPageLRU(page))
 			BUG();
+		ClearPageSkipped(page);
 		add_page_to_inactive_list(zone, page);
 	}
 	if (zone)

_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2005-05-02  9:03 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-04-17 17:36 [PATCH]: VM 3/8 PG_skipped Nikita Danilov
2005-04-18 15:12 ` Rik van Riel
2005-04-18 17:51   ` Dave Hansen
2005-04-18 20:29     ` Nikita Danilov
2005-04-26  3:43 ` Andrew Morton
2005-05-02  9:03   ` Hirokazu Takahashi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox