From: Nikita Danilov <nikita@clusterfs.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <AKPM@Osdl.ORG>
Subject: [PATCH]: VM 3/8 PG_skipped
Date: Sun, 17 Apr 2005 21:36:03 +0400 [thread overview]
Message-ID: <16994.40579.617974.423522@gargle.gargle.HOWL> (raw)
Don't call ->writepage from VM scanner when page is met for the first time
during scan.
New page flag PG_skipped is used for this. This flag is TestSet-ed just
before calling ->writepage and is cleaned when page enters inactive
list.
One can see this as "second chance" algorithm for the dirty pages on the
inactive list.
BSD does the same: src/sys/vm/vm_pageout.c:vm_pageout_scan(),
PG_WINATCFLS flag.
Reason behind this is that ->writepages() will perform more efficient writeout
than ->writepage(). Skipping of page can be conditioned on zone->pressure.
On the other hand, avoiding ->writepage() increases amount of scanning
performed by kswapd.
Signed-off-by: Nikita Danilov <nikita@clusterfs.com>
include/linux/page-flags.h | 7 +++
mm/swap.c | 1
mm/truncate.c | 2 +
mm/vmscan.c | 80 +++++++++++++++++++++++++++++++--------------
4 files changed, 66 insertions(+), 24 deletions(-)
diff -puN mm/vmscan.c~skip-writepage mm/vmscan.c
--- bk-linux/mm/vmscan.c~skip-writepage 2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/vmscan.c 2005-04-17 17:52:49.000000000 +0400
@@ -331,18 +329,50 @@ static pageout_t pageout(struct page *pa
return PAGE_ACTIVATE;
if (!may_write_to_queue(mapping->backing_dev_info))
return PAGE_KEEP;
-
+ /*
+ * Don't call ->writepage when page is met for the first time during
+ * scanning. Reasons:
+ *
+ * 1. if memory pressure is not too high, skipping ->writepage()
+ * may avoid writing out page that will be re-dirtied (should not
+ * be too important, because scanning starts from the tail of
+ * inactive list, where pages are _supposed_ to be rarely used,
+ * but when under constant memory pressure, inactive list is
+ * rotated and so is more FIFO than LRU).
+ *
+ * 2. ->writepages() writes data more efficiently than
+ * ->writepage().
+ */
+ if (!TestSetPageSkipped(page))
+ return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
+
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
- .nonblocking = 1,
- .for_reclaim = 1,
+ /*
+ * synchronous page reclamation should be non blocking
+ * for the reasons outlined in the comment above. But
+ * in the kswapd blocking is ok.
+ *
+ * NOTE:
+ *
+ * 1. .nonblocking is not analyzed by existing
+ * in-tree implementations of ->writepage().
+ *
+ * 2. may be if page zone is under considerable
+ * memory pressure (zone->prev_priority is low),
+ * .nonblocking should be set anyway.
+ */
+ .nonblocking = !current_is_kswapd(),
+ .for_reclaim = 1 /* XXX not used */
};
+ ClearPageSkipped(page);
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
+
if (res < 0)
handle_write_error(mapping, page, res);
if (res == WRITEPAGE_ACTIVATE) {
@@ -353,10 +383,8 @@ static pageout_t pageout(struct page *pa
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
-
return PAGE_SUCCESS;
}
-
return PAGE_CLEAN;
}
@@ -643,10 +671,13 @@ static void shrink_cache(struct zone *zo
if (TestSetPageLRU(page))
BUG();
list_del(&page->lru);
- if (PageActive(page))
+ if (PageActive(page)) {
+ if (PageSkipped(page))
+ ClearPageSkipped(page);
add_page_to_active_list(zone, page);
- else
+ } else {
add_page_to_inactive_list(zone, page);
+ }
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
@@ -757,6 +788,7 @@ refill_inactive_zone(struct zone *zone,
BUG();
if (!TestClearPageActive(page))
BUG();
+ ClearPageSkipped(page);
list_move(&page->lru, &zone->inactive_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
diff -puN include/linux/page-flags.h~skip-writepage include/linux/page-flags.h
--- bk-linux/include/linux/page-flags.h~skip-writepage 2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/include/linux/page-flags.h 2005-04-17 17:52:49.000000000 +0400
@@ -76,6 +76,7 @@
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_nosave_free 19 /* Free, should not be written */
#define PG_uncached 20 /* Page has been mapped as uncached */
+#define PG_skipped 21 /* ->writepage() was skipped */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -161,6 +162,12 @@ extern void __mod_page_state(unsigned of
__mod_page_state(offset, (delta)); \
} while (0)
+#define PageSkipped(page) test_bit(PG_skipped, &(page)->flags)
+#define SetPageSkipped(page) set_bit(PG_skipped, &(page)->flags)
+#define TestSetPageSkipped(page) test_and_set_bit(PG_skipped, &(page)->flags)
+#define ClearPageSkipped(page) clear_bit(PG_skipped, &(page)->flags)
+#define TestClearPageSkipped(page) test_and_clear_bit(PG_skipped, &(page)->flags)
+
/*
* Manipulation of page state flags
*/
diff -puN mm/truncate.c~skip-writepage mm/truncate.c
--- bk-linux/mm/truncate.c~skip-writepage 2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/truncate.c 2005-04-17 17:52:49.000000000 +0400
@@ -54,6 +54,7 @@ truncate_complete_page(struct address_sp
clear_page_dirty(page);
ClearPageUptodate(page);
ClearPageMappedToDisk(page);
+ ClearPageSkipped(page);
remove_from_page_cache(page);
page_cache_release(page); /* pagecache ref */
}
@@ -86,6 +87,7 @@ invalidate_complete_page(struct address_
__remove_from_page_cache(page);
write_unlock_irq(&mapping->tree_lock);
ClearPageUptodate(page);
+ ClearPageSkipped(page);
page_cache_release(page); /* pagecache ref */
return 1;
}
diff -puN mm/swap.c~skip-writepage mm/swap.c
--- bk-linux/mm/swap.c~skip-writepage 2005-04-17 17:52:49.000000000 +0400
+++ bk-linux-nikita/mm/swap.c 2005-04-17 17:52:49.000000000 +0400
@@ -303,6 +303,7 @@ void __pagevec_lru_add(struct pagevec *p
}
if (TestSetPageLRU(page))
BUG();
+ ClearPageSkipped(page);
add_page_to_inactive_list(zone, page);
}
if (zone)
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next reply other threads:[~2005-04-17 17:36 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-04-17 17:36 Nikita Danilov [this message]
2005-04-18 15:12 ` Rik van Riel
2005-04-18 17:51 ` Dave Hansen
2005-04-18 20:29 ` Nikita Danilov
2005-04-26 3:43 ` Andrew Morton
2005-05-02 9:03 ` Hirokazu Takahashi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=16994.40579.617974.423522@gargle.gargle.HOWL \
--to=nikita@clusterfs.com \
--cc=AKPM@Osdl.ORG \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox