linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/3] activate pages in batch
@ 2008-10-22 22:50 Johannes Weiner
  2008-10-22 22:50 ` [patch 1/3] swap: use an array for all pagevecs Johannes Weiner
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Johannes Weiner @ 2008-10-22 22:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

Instead of re-acquiring the highly contented LRU lock on every single
page activation, deploy an extra pagevec to do page activation in
batch.

The first patch is just grouping all pagevecs we use into one array
which makes further refactoring easier.

The second patch simplifies the interface for flushing a pagevec to
the proper LRU list.

And finally, the last patch changes page activation to batch-mode.

	Hannes

 include/linux/pagevec.h |   21 +++-
 mm/swap.c               |  216 ++++++++++++++++++++++++------------------------
 2 files changed, 127 insertions(+), 110 deletions(-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 1/3] swap: use an array for all pagevecs
  2008-10-22 22:50 [patch 0/3] activate pages in batch Johannes Weiner
@ 2008-10-22 22:50 ` Johannes Weiner
  2008-10-22 22:50 ` [patch 2/3] swap: refactor pagevec flushing Johannes Weiner
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Johannes Weiner @ 2008-10-22 22:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

[-- Attachment #1: swap-use-an-array-for-all-pagevecs.patch --]
[-- Type: text/plain, Size: 2613 bytes --]

Use an array for all pagevecs, not just for those we cache new LRU
pages on.  This will ease further refactoring.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
---
 include/linux/pagevec.h |    7 +++++++
 mm/swap.c               |   15 +++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

--- a/mm/swap.c
+++ b/mm/swap.c
@@ -36,8 +36,7 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
+static DEFINE_PER_CPU(struct pagevec[NR_LRU_PAGEVECS], lru_pvecs);
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -144,7 +143,7 @@ void  rotate_reclaimable_page(struct pag
 
 		page_cache_get(page);
 		local_irq_save(flags);
-		pvec = &__get_cpu_var(lru_rotate_pvecs);
+		pvec = &__get_cpu_var(lru_pvecs)[PAGEVEC_ROTATE];
 		if (!pagevec_add(pvec, page))
 			pagevec_move_tail(pvec);
 		local_irq_restore(flags);
@@ -198,12 +197,12 @@ EXPORT_SYMBOL(mark_page_accessed);
 
 void __lru_cache_add(struct page *page, enum lru_list lru)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
+	struct pagevec *pvec = &get_cpu_var(lru_pvecs)[PAGEVEC_ADD + lru];
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
 		____pagevec_lru_add(pvec, lru);
-	put_cpu_var(lru_add_pvecs);
+	put_cpu_var(lru_pvecs);
 }
 
 /**
@@ -272,17 +271,17 @@ void lru_cache_add_active_or_unevictable
  */
 static void drain_cpu_pagevecs(int cpu)
 {
-	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
+	struct pagevec *pvecs = per_cpu(lru_pvecs, cpu);
 	struct pagevec *pvec;
 	int lru;
 
 	for_each_lru(lru) {
-		pvec = &pvecs[lru - LRU_BASE];
+		pvec = &pvecs[PAGEVEC_ADD + lru];
 		if (pagevec_count(pvec))
 			____pagevec_lru_add(pvec, lru);
 	}
 
-	pvec = &per_cpu(lru_rotate_pvecs, cpu);
+	pvec = &pvecs[PAGEVEC_ROTATE];
 	if (pagevec_count(pvec)) {
 		unsigned long flags;
 
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -20,6 +20,13 @@ struct pagevec {
 	struct page *pages[PAGEVEC_SIZE];
 };
 
+enum lru_pagevec {
+	PAGEVEC_BASE,
+	PAGEVEC_ADD = PAGEVEC_BASE,
+	PAGEVEC_ROTATE = NR_LRU_LISTS,
+	NR_LRU_PAGEVECS
+};
+
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 2/3] swap: refactor pagevec flushing
  2008-10-22 22:50 [patch 0/3] activate pages in batch Johannes Weiner
  2008-10-22 22:50 ` [patch 1/3] swap: use an array for all pagevecs Johannes Weiner
@ 2008-10-22 22:50 ` Johannes Weiner
  2008-10-27  6:50   ` Andrew Morton
  2008-10-22 22:50 ` [patch 3/3] swap: cache page activation Johannes Weiner
  2008-10-23  1:41 ` [patch 0/3] activate pages in batch KOSAKI Motohiro
  3 siblings, 1 reply; 11+ messages in thread
From: Johannes Weiner @ 2008-10-22 22:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

[-- Attachment #1: swap-refactor-pagevec-flushing.patch --]
[-- Type: text/plain, Size: 6702 bytes --]

Having all pagevecs in one array allows for easier flushing.  Use a
single flush function that decides what to do based on the target LRU.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
---
 include/linux/pagevec.h |   13 +++--
 mm/swap.c               |  121 +++++++++++++++++++++++-------------------------
 2 files changed, 66 insertions(+), 68 deletions(-)

--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -27,10 +27,13 @@ enum lru_pagevec {
 	NR_LRU_PAGEVECS
 };
 
+#define for_each_lru_pagevec(pv)		\
+	for (pv = 0; pv < NR_LRU_PAGEVECS; pv++)
+
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
+void __pagevec_flush(struct pagevec *pvec, enum lru_pagevec target);
 void pagevec_strip(struct pagevec *pvec);
 void pagevec_swap_free(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
@@ -90,22 +93,22 @@ static inline void pagevec_free(struct p
 
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+	__pagevec_flush(pvec, LRU_INACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+	__pagevec_flush(pvec, LRU_ACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+	__pagevec_flush(pvec, LRU_INACTIVE_FILE);
 }
 
 static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+	__pagevec_flush(pvec, LRU_ACTIVE_FILE);
 }
 
 static inline void pagevec_lru_add_file(struct pagevec *pvec)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -96,17 +96,44 @@ void put_pages_list(struct list_head *pa
 }
 EXPORT_SYMBOL(put_pages_list);
 
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
+static void pagevec_flush_add(struct zone *zone, struct page *page,
+			enum lru_list lru)
+{
+	VM_BUG_ON(is_unevictable_lru(lru));
+	VM_BUG_ON(PageActive(page));
+	VM_BUG_ON(PageUnevictable(page));
+	VM_BUG_ON(PageLRU(page));
+	SetPageLRU(page);
+	if (is_active_lru(lru))
+		SetPageActive(page);
+	add_page_to_lru_list(zone, page, lru);
+}
+
+static void pagevec_flush_rotate(struct zone *zone, struct page *page)
+{
+	int lru;
+
+	if (!PageLRU(page) || PageActive(page) || PageUnevictable(page))
+		return;
+	lru = page_is_file_cache(page);
+	list_move_tail(&page->lru, &zone->lru[lru].list);
+	__count_vm_event(PGROTATED);
+}
+
+static enum lru_pagevec target_mode(enum lru_pagevec target)
+{
+	if (target > PAGEVEC_ADD && target < PAGEVEC_ROTATE)
+		return PAGEVEC_ADD;
+	return target;
+}
+
+static void ____pagevec_flush(struct pagevec *pvec, enum lru_pagevec target)
 {
 	int i;
-	int pgmoved = 0;
 	struct zone *zone = NULL;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
+		enum lru_pagevec mode;
 		struct page *page = pvec->pages[i];
 		struct zone *pagezone = page_zone(page);
 
@@ -116,19 +143,33 @@ static void pagevec_move_tail(struct pag
 			zone = pagezone;
 			spin_lock(&zone->lru_lock);
 		}
-		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_is_file_cache(page);
-			list_move_tail(&page->lru, &zone->lru[lru].list);
-			pgmoved++;
+
+		mode = target_mode(target);
+		switch (mode) {
+		case PAGEVEC_ADD:
+			pagevec_flush_add(zone, page, target);
+			break;
+		case PAGEVEC_ROTATE:
+			pagevec_flush_rotate(zone, page);
+			break;
+		default:
+			BUG();
 		}
 	}
 	if (zone)
 		spin_unlock(&zone->lru_lock);
-	__count_vm_events(PGROTATED, pgmoved);
 	release_pages(pvec->pages, pvec->nr, pvec->cold);
 	pagevec_reinit(pvec);
 }
 
+void __pagevec_flush(struct pagevec *pvec, enum lru_pagevec target)
+{
+	local_irq_disable();
+	____pagevec_flush(pvec, target);
+	local_irq_enable();
+}
+EXPORT_SYMBOL(__pagevec_flush);
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
@@ -145,7 +186,7 @@ void  rotate_reclaimable_page(struct pag
 		local_irq_save(flags);
 		pvec = &__get_cpu_var(lru_pvecs)[PAGEVEC_ROTATE];
 		if (!pagevec_add(pvec, page))
-			pagevec_move_tail(pvec);
+			____pagevec_flush(pvec, PAGEVEC_ROTATE);
 		local_irq_restore(flags);
 	}
 }
@@ -201,7 +242,7 @@ void __lru_cache_add(struct page *page, 
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		____pagevec_lru_add(pvec, lru);
+		__pagevec_flush(pvec, lru);
 	put_cpu_var(lru_pvecs);
 }
 
@@ -273,22 +314,12 @@ static void drain_cpu_pagevecs(int cpu)
 {
 	struct pagevec *pvecs = per_cpu(lru_pvecs, cpu);
 	struct pagevec *pvec;
-	int lru;
+	int pv;
 
-	for_each_lru(lru) {
-		pvec = &pvecs[PAGEVEC_ADD + lru];
+	for_each_lru_pagevec(pv) {
+		pvec = &pvecs[pv];
 		if (pagevec_count(pvec))
-			____pagevec_lru_add(pvec, lru);
-	}
-
-	pvec = &pvecs[PAGEVEC_ROTATE];
-	if (pagevec_count(pvec)) {
-		unsigned long flags;
-
-		/* No harm done if a racing interrupt already did this */
-		local_irq_save(flags);
-		pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+			__pagevec_flush(pvec, pv);
 	}
 }
 
@@ -432,42 +463,6 @@ void __pagevec_release_nonlru(struct pag
 }
 
 /*
- * Add the passed pages to the LRU, then drop the caller's refcount
- * on them.  Reinitialises the caller's pagevec.
- */
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
-{
-	int i;
-	struct zone *zone = NULL;
-	VM_BUG_ON(is_unevictable_lru(lru));
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
-
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		VM_BUG_ON(PageActive(page));
-		VM_BUG_ON(PageUnevictable(page));
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		if (is_active_lru(lru))
-			SetPageActive(page);
-		add_page_to_lru_list(zone, page, lru);
-	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
-}
-
-EXPORT_SYMBOL(____pagevec_lru_add);
-
-/*
  * Try to drop buffers from the pages in a pagevec
  */
 void pagevec_strip(struct pagevec *pvec)


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 3/3] swap: cache page activation
  2008-10-22 22:50 [patch 0/3] activate pages in batch Johannes Weiner
  2008-10-22 22:50 ` [patch 1/3] swap: use an array for all pagevecs Johannes Weiner
  2008-10-22 22:50 ` [patch 2/3] swap: refactor pagevec flushing Johannes Weiner
@ 2008-10-22 22:50 ` Johannes Weiner
  2008-10-23  1:41 ` [patch 0/3] activate pages in batch KOSAKI Motohiro
  3 siblings, 0 replies; 11+ messages in thread
From: Johannes Weiner @ 2008-10-22 22:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

[-- Attachment #1: swap-cache-page-activation.patch --]
[-- Type: text/plain, Size: 3938 bytes --]

Instead of acquiring the highly contented LRU lock on each page
activation, use a pagevec and activate pages batch-wise.

Also factor out the add-to-cache-maybe-flush mechanism that is
shared between page rotation and activation code.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
---
 include/linux/pagevec.h |    1 
 mm/swap.c               |   80 +++++++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 34 deletions(-)

--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -24,6 +24,7 @@ enum lru_pagevec {
 	PAGEVEC_BASE,
 	PAGEVEC_ADD = PAGEVEC_BASE,
 	PAGEVEC_ROTATE = NR_LRU_LISTS,
+	PAGEVEC_ACTIVATE,
 	NR_LRU_PAGEVECS
 };
 
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -120,6 +120,24 @@ static void pagevec_flush_rotate(struct 
 	__count_vm_event(PGROTATED);
 }
 
+static void pagevec_flush_activate(struct zone *zone, struct page *page)
+{
+	int file, lru;
+
+	if (!PageLRU(page) || PageActive(page) || PageUnevictable(page))
+		return;
+	file = page_is_file_cache(page);
+	lru = LRU_BASE + file;
+	del_page_from_lru_list(zone, page, lru);
+	SetPageActive(page);
+	lru += LRU_ACTIVE;
+	add_page_to_lru_list(zone, page, lru);
+	mem_cgroup_move_lists(page, lru);
+	__count_vm_event(PGACTIVATE);
+	zone->recent_rotated[!!file]++;
+	zone->recent_scanned[!!file]++;
+}
+
 static enum lru_pagevec target_mode(enum lru_pagevec target)
 {
 	if (target > PAGEVEC_ADD && target < PAGEVEC_ROTATE)
@@ -152,6 +170,9 @@ static void ____pagevec_flush(struct pag
 		case PAGEVEC_ROTATE:
 			pagevec_flush_rotate(zone, page);
 			break;
+		case PAGEVEC_ACTIVATE:
+			pagevec_flush_activate(zone, page);
+			break;
 		default:
 			BUG();
 		}
@@ -170,50 +191,41 @@ void __pagevec_flush(struct pagevec *pve
 }
 EXPORT_SYMBOL(__pagevec_flush);
 
+static void move_page(struct page *page, enum lru_pagevec target)
+{
+	struct pagevec *pvec;
+
+	pvec = &__get_cpu_var(lru_pvecs)[target];
+	if (!pagevec_add(pvec, page))
+		____pagevec_flush(pvec, target);
+}
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
-void  rotate_reclaimable_page(struct page *page)
+void rotate_reclaimable_page(struct page *page)
 {
-	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
-	    !PageUnevictable(page) && PageLRU(page)) {
-		struct pagevec *pvec;
-		unsigned long flags;
-
-		page_cache_get(page);
-		local_irq_save(flags);
-		pvec = &__get_cpu_var(lru_pvecs)[PAGEVEC_ROTATE];
-		if (!pagevec_add(pvec, page))
-			____pagevec_flush(pvec, PAGEVEC_ROTATE);
-		local_irq_restore(flags);
-	}
+	unsigned long flags;
+
+	if (!PageLRU(page) || PageActive(page) || PageUnevictable(page))
+		return;
+	if (PageLocked(page) || PageDirty(page))
+		return;
+	page_cache_get(page);
+	local_irq_save(flags);
+	move_page(page, PAGEVEC_ROTATE);
+	local_irq_restore(flags);
 }
 
-/*
- * FIXME: speed this up?
- */
 void activate_page(struct page *page)
 {
-	struct zone *zone = page_zone(page);
-
-	spin_lock_irq(&zone->lru_lock);
-	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		int file = page_is_file_cache(page);
-		int lru = LRU_BASE + file;
-		del_page_from_lru_list(zone, page, lru);
-
-		SetPageActive(page);
-		lru += LRU_ACTIVE;
-		add_page_to_lru_list(zone, page, lru);
-		__count_vm_event(PGACTIVATE);
-		mem_cgroup_move_lists(page, lru);
-
-		zone->recent_rotated[!!file]++;
-		zone->recent_scanned[!!file]++;
-	}
-	spin_unlock_irq(&zone->lru_lock);
+	if (!PageLRU(page) || PageActive(page) || PageUnevictable(page))
+		return;
+	local_irq_disable();
+	move_page(page, PAGEVEC_ACTIVATE);
+	local_irq_enable();
 }
 
 /*


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 0/3] activate pages in batch
  2008-10-22 22:50 [patch 0/3] activate pages in batch Johannes Weiner
                   ` (2 preceding siblings ...)
  2008-10-22 22:50 ` [patch 3/3] swap: cache page activation Johannes Weiner
@ 2008-10-23  1:41 ` KOSAKI Motohiro
  2008-10-23  2:00   ` Johannes Weiner
  3 siblings, 1 reply; 11+ messages in thread
From: KOSAKI Motohiro @ 2008-10-23  1:41 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: kosaki.motohiro, Andrew Morton, Rik van Riel, Peter Zijlstra, linux-mm

Hi, Hannes

> Instead of re-acquiring the highly contented LRU lock on every single
> page activation, deploy an extra pagevec to do page activation in
> batch.

Do you have any mesurement result?


> 
> The first patch is just grouping all pagevecs we use into one array
> which makes further refactoring easier.
> 
> The second patch simplifies the interface for flushing a pagevec to
> the proper LRU list.
> 
> And finally, the last patch changes page activation to batch-mode.
> 
> 	Hannes
> 
>  include/linux/pagevec.h |   21 +++-
>  mm/swap.c               |  216 ++++++++++++++++++++++++------------------------
>  2 files changed, 127 insertions(+), 110 deletions(-)



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 0/3] activate pages in batch
  2008-10-23  1:41 ` [patch 0/3] activate pages in batch KOSAKI Motohiro
@ 2008-10-23  2:00   ` Johannes Weiner
  2008-10-23  2:10     ` KOSAKI Motohiro
  0 siblings, 1 reply; 11+ messages in thread
From: Johannes Weiner @ 2008-10-23  2:00 UTC (permalink / raw)
  To: KOSAKI Motohiro; +Cc: Andrew Morton, Rik van Riel, Peter Zijlstra, linux-mm

KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> writes:

> Hi, Hannes
>
>> Instead of re-acquiring the highly contented LRU lock on every single
>> page activation, deploy an extra pagevec to do page activation in
>> batch.
>
> Do you have any mesurement result?

Not yet, sorry.

Spinlocks are no-ops on my architecture, though, so the best I can come
up with is results from emulating an SMP machine, would that be okay?

	Hannes

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 0/3] activate pages in batch
  2008-10-23  2:00   ` Johannes Weiner
@ 2008-10-23  2:10     ` KOSAKI Motohiro
  2008-10-23 16:21       ` Johannes Weiner
  0 siblings, 1 reply; 11+ messages in thread
From: KOSAKI Motohiro @ 2008-10-23  2:10 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: kosaki.motohiro, Andrew Morton, Rik van Riel, Peter Zijlstra, linux-mm

> >> Instead of re-acquiring the highly contented LRU lock on every single
> >> page activation, deploy an extra pagevec to do page activation in
> >> batch.
> >
> > Do you have any mesurement result?
> 
> Not yet, sorry.
> 
> Spinlocks are no-ops on my architecture, though, so the best I can come
> up with is results from emulating an SMP machine, would that be okay?

it's not ok..

if you can explain best mesurement way, I can mesure on 8 way machine :)
(but, of cource, I should mesure your madv_sequence patch earlier)




--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 0/3] activate pages in batch
  2008-10-23  2:10     ` KOSAKI Motohiro
@ 2008-10-23 16:21       ` Johannes Weiner
  0 siblings, 0 replies; 11+ messages in thread
From: Johannes Weiner @ 2008-10-23 16:21 UTC (permalink / raw)
  To: KOSAKI Motohiro; +Cc: Andrew Morton, Rik van Riel, Peter Zijlstra, linux-mm

KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> writes:

>> >> Instead of re-acquiring the highly contented LRU lock on every single
>> >> page activation, deploy an extra pagevec to do page activation in
>> >> batch.
>> >
>> > Do you have any mesurement result?
>> 
>> Not yet, sorry.
>> 
>> Spinlocks are no-ops on my architecture, though, so the best I can come
>> up with is results from emulating an SMP machine, would that be okay?
>
> it's not ok..

Ok.

> if you can explain best mesurement way, I can mesure on 8 way machine
> :)

Hmm, the `best way' is probably something else, but I played with the
attached program.  It causes around as much activations as I read in
pages and a lot of scanning, too, so perhaps this could work.  On your
box, you most likely need to turn up the knobs a bit, though ;)

> (but, of cource, I should mesure your madv_sequence patch earlier)

Thanks a lot for this, btw!

        Hannes

---
Sample output from the program:

$ egrep '(pgactivate|pgscan_direct_normal)' /proc/vmstat; \
  /usr/bin/time ./activate-reclaim-smp; \
  egrep '(pgactivate|pgscan_direct_normal)' /proc/vmstat

pgactivate 9587603
pgscan_direct_normal 8150176
2: warning, can not migrate to cpu
1: warning, can not migrate to cpu
<snipped warnings, you shouldn't get those, of course!>
/loader
/children
1.93user 16.36system 0:58.17elapsed 31%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (31major+526765minor)pagefaults 0swaps
pgactivate 9856316
pgscan_direct_normal 8603232

---
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sched.h>
#include <sys/mman.h>

#define NR_CPUS		8
#define PROCS_PER_CPU	4

#define NR_PROCS	(NR_CPUS * PROCS_PER_CPU)

#define FILE_SIZE       (1<<30)
#define ANON_SIZE       (1<<30)

static void move_self_to(int cpu)
{
	cpu_set_t set;

	CPU_ZERO(&set);
	CPU_SET(cpu, &set);
	if (sched_setaffinity(0, sizeof(set), &set))
		printf("%d: warning, can not migrate to cpu\n", cpu);
	sched_yield();
}

/* generate file pages */
static void reader(int cpu)
{
	int fd;
	char buf;
	unsigned long off;

	fd = open("zeroes", O_RDONLY);
	if (fd < 0) {
		printf("%d: open() failed\n", cpu);
		return;
	}

	for (off = 0; off < FILE_SIZE; off += sysconf(_SC_PAGESIZE)) {
		if (!read(fd, &buf, 1))
			puts("huh?");
		lseek(fd, off, SEEK_SET);
	}
	close(fd);
}

/* generate anon pages to trigger reclaims */
static void loader(void)
{
	char *map;
	unsigned long offset;

	map = mmap(NULL, ANON_SIZE, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
	if (!map) {
		printf("failed to anon-map\n");
		return;
	}

	for (offset = 0; offset < ANON_SIZE; offset += sysconf(_SC_PAGESIZE))
		if (map[offset])
			puts("huh?");

	munmap(map, ANON_SIZE);
}

static pid_t spawn_on(int cpu)
{
	pid_t child = fork();

	switch (child) {
	case -1:
		printf("%d: fork() failed\n", cpu);
		exit(1);
	case 0:
		move_self_to(cpu);
		reader(cpu);
		exit(0);
	default:
		return child;
	}
}

int main(void)
{
	int cpu = -1, proc;
	pid_t children[NR_PROCS];

	while (++cpu < NR_CPUS)
		for (proc = 0; proc < PROCS_PER_CPU; proc++)
			children[cpu+proc] = spawn_on(cpu);

	loader();
	loader();
	puts("/loader");

	for (proc = 0; proc < NR_PROCS; proc++)
		waitpid(children[proc], &cpu, 0);
	puts("/children");

	return 0;
}
		

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 2/3] swap: refactor pagevec flushing
  2008-10-22 22:50 ` [patch 2/3] swap: refactor pagevec flushing Johannes Weiner
@ 2008-10-27  6:50   ` Andrew Morton
  2008-10-27  8:08     ` Johannes Weiner
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2008-10-27  6:50 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

On Thu, 23 Oct 2008 00:50:08 +0200 Johannes Weiner <hannes@saeurebad.de> wrote:

> Having all pagevecs in one array allows for easier flushing.  Use a
> single flush function that decides what to do based on the target LRU.
> 
> Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
> ---
>  include/linux/pagevec.h |   13 +++--
>  mm/swap.c               |  121 +++++++++++++++++++++++-------------------------
>  2 files changed, 66 insertions(+), 68 deletions(-)
> 
> --- a/include/linux/pagevec.h
> +++ b/include/linux/pagevec.h
> @@ -27,10 +27,13 @@ enum lru_pagevec {
>  	NR_LRU_PAGEVECS
>  };
>  
> +#define for_each_lru_pagevec(pv)		\
> +	for (pv = 0; pv < NR_LRU_PAGEVECS; pv++)

This only gets used once.  I don't think it's existence is justified?

(`pv' is usally parenthesised in macros like this, but it's unlikely to
matter).

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 2/3] swap: refactor pagevec flushing
  2008-10-27  6:50   ` Andrew Morton
@ 2008-10-27  8:08     ` Johannes Weiner
  2008-10-27 12:18       ` [patch 2/3 v2] " Johannes Weiner
  0 siblings, 1 reply; 11+ messages in thread
From: Johannes Weiner @ 2008-10-27  8:08 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

Andrew Morton <akpm@linux-foundation.org> writes:

> On Thu, 23 Oct 2008 00:50:08 +0200 Johannes Weiner <hannes@saeurebad.de> wrote:
>
>> Having all pagevecs in one array allows for easier flushing.  Use a
>> single flush function that decides what to do based on the target LRU.
>> 
>> Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
>> ---
>>  include/linux/pagevec.h |   13 +++--
>>  mm/swap.c               |  121 +++++++++++++++++++++++-------------------------
>>  2 files changed, 66 insertions(+), 68 deletions(-)
>> 
>> --- a/include/linux/pagevec.h
>> +++ b/include/linux/pagevec.h
>> @@ -27,10 +27,13 @@ enum lru_pagevec {
>>  	NR_LRU_PAGEVECS
>>  };
>>  
>> +#define for_each_lru_pagevec(pv)		\
>> +	for (pv = 0; pv < NR_LRU_PAGEVECS; pv++)
>
> This only gets used once.  I don't think it's existence is justified?

I don't see any other use-case for it now.  So, yes, let's drop it.

> (`pv' is usally parenthesised in macros like this, but it's unlikely to
> matter).

Hmm, wondering which valid lvalue construction could break it...?
Probably something involving stars...

Okay, get doubly rid of it.  Replacement patch coming soon.

        Hannes

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 2/3 v2] swap: refactor pagevec flushing
  2008-10-27  8:08     ` Johannes Weiner
@ 2008-10-27 12:18       ` Johannes Weiner
  0 siblings, 0 replies; 11+ messages in thread
From: Johannes Weiner @ 2008-10-27 12:18 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, Peter Zijlstra, linux-mm

Johannes Weiner <hannes@saeurebad.de> writes:

> Andrew Morton <akpm@linux-foundation.org> writes:
>
>> On Thu, 23 Oct 2008 00:50:08 +0200 Johannes Weiner <hannes@saeurebad.de> wrote:
>>
>>> Having all pagevecs in one array allows for easier flushing.  Use a
>>> single flush function that decides what to do based on the target LRU.
>>> 
>>> Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
>>> ---
>>>  include/linux/pagevec.h |   13 +++--
>>>  mm/swap.c               |  121 +++++++++++++++++++++++-------------------------
>>>  2 files changed, 66 insertions(+), 68 deletions(-)
>>> 
>>> --- a/include/linux/pagevec.h
>>> +++ b/include/linux/pagevec.h
>>> @@ -27,10 +27,13 @@ enum lru_pagevec {
>>>  	NR_LRU_PAGEVECS
>>>  };
>>>  
>>> +#define for_each_lru_pagevec(pv)		\
>>> +	for (pv = 0; pv < NR_LRU_PAGEVECS; pv++)
>>
>> This only gets used once.  I don't think it's existence is justified?
>
> I don't see any other use-case for it now.  So, yes, let's drop it.
>
>> (`pv' is usally parenthesised in macros like this, but it's unlikely to
>> matter).
>
> Hmm, wondering which valid lvalue construction could break it...?
> Probably something involving stars...
>
> Okay, get doubly rid of it.  Replacement patch coming soon.

---
Having all pagevecs in one array allows for easier flushing.  Use a
single flush function that decides what to do based on the target LRU.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
---
v2: Drop for_each_lru_pagevec()

 include/linux/pagevec.h |   10 +--
 mm/swap.c               |  121 +++++++++++++++++++++++-------------------------
 2 files changed, 63 insertions(+), 68 deletions(-)

--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -30,7 +30,7 @@ enum lru_pagevec {
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
+void __pagevec_flush(struct pagevec *pvec, enum lru_pagevec target);
 void pagevec_strip(struct pagevec *pvec);
 void pagevec_swap_free(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
@@ -90,22 +90,22 @@ static inline void pagevec_free(struct p
 
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+	__pagevec_flush(pvec, LRU_INACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+	__pagevec_flush(pvec, LRU_ACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+	__pagevec_flush(pvec, LRU_INACTIVE_FILE);
 }
 
 static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+	__pagevec_flush(pvec, LRU_ACTIVE_FILE);
 }
 
 static inline void pagevec_lru_add_file(struct pagevec *pvec)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -96,17 +96,44 @@ void put_pages_list(struct list_head *pa
 }
 EXPORT_SYMBOL(put_pages_list);
 
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
+static void pagevec_flush_add(struct zone *zone, struct page *page,
+			enum lru_list lru)
+{
+	VM_BUG_ON(is_unevictable_lru(lru));
+	VM_BUG_ON(PageActive(page));
+	VM_BUG_ON(PageUnevictable(page));
+	VM_BUG_ON(PageLRU(page));
+	SetPageLRU(page);
+	if (is_active_lru(lru))
+		SetPageActive(page);
+	add_page_to_lru_list(zone, page, lru);
+}
+
+static void pagevec_flush_rotate(struct zone *zone, struct page *page)
+{
+	int lru;
+
+	if (!PageLRU(page) || PageActive(page) || PageUnevictable(page))
+		return;
+	lru = page_is_file_cache(page);
+	list_move_tail(&page->lru, &zone->lru[lru].list);
+	__count_vm_event(PGROTATED);
+}
+
+static enum lru_pagevec target_mode(enum lru_pagevec target)
+{
+	if (target > PAGEVEC_ADD && target < PAGEVEC_ROTATE)
+		return PAGEVEC_ADD;
+	return target;
+}
+
+static void ____pagevec_flush(struct pagevec *pvec, enum lru_pagevec target)
 {
 	int i;
-	int pgmoved = 0;
 	struct zone *zone = NULL;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
+		enum lru_pagevec mode;
 		struct page *page = pvec->pages[i];
 		struct zone *pagezone = page_zone(page);
 
@@ -116,19 +143,33 @@ static void pagevec_move_tail(struct pag
 			zone = pagezone;
 			spin_lock(&zone->lru_lock);
 		}
-		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_is_file_cache(page);
-			list_move_tail(&page->lru, &zone->lru[lru].list);
-			pgmoved++;
+
+		mode = target_mode(target);
+		switch (mode) {
+		case PAGEVEC_ADD:
+			pagevec_flush_add(zone, page, target);
+			break;
+		case PAGEVEC_ROTATE:
+			pagevec_flush_rotate(zone, page);
+			break;
+		default:
+			BUG();
 		}
 	}
 	if (zone)
 		spin_unlock(&zone->lru_lock);
-	__count_vm_events(PGROTATED, pgmoved);
 	release_pages(pvec->pages, pvec->nr, pvec->cold);
 	pagevec_reinit(pvec);
 }
 
+void __pagevec_flush(struct pagevec *pvec, enum lru_pagevec target)
+{
+	local_irq_disable();
+	____pagevec_flush(pvec, target);
+	local_irq_enable();
+}
+EXPORT_SYMBOL(__pagevec_flush);
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
@@ -145,7 +186,7 @@ void  rotate_reclaimable_page(struct pag
 		local_irq_save(flags);
 		pvec = &__get_cpu_var(lru_pvecs)[PAGEVEC_ROTATE];
 		if (!pagevec_add(pvec, page))
-			pagevec_move_tail(pvec);
+			____pagevec_flush(pvec, PAGEVEC_ROTATE);
 		local_irq_restore(flags);
 	}
 }
@@ -201,7 +242,7 @@ void __lru_cache_add(struct page *page, 
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		____pagevec_lru_add(pvec, lru);
+		__pagevec_flush(pvec, lru);
 	put_cpu_var(lru_pvecs);
 }
 
@@ -273,22 +314,12 @@ static void drain_cpu_pagevecs(int cpu)
 {
 	struct pagevec *pvecs = per_cpu(lru_pvecs, cpu);
 	struct pagevec *pvec;
-	int lru;
+	int pv;
 
-	for_each_lru(lru) {
-		pvec = &pvecs[PAGEVEC_ADD + lru];
+	for (pv = 0; pv < NR_LRU_PAGEVECS; pv++) {
+		pvec = &pvecs[pv];
 		if (pagevec_count(pvec))
-			____pagevec_lru_add(pvec, lru);
-	}
-
-	pvec = &pvecs[PAGEVEC_ROTATE];
-	if (pagevec_count(pvec)) {
-		unsigned long flags;
-
-		/* No harm done if a racing interrupt already did this */
-		local_irq_save(flags);
-		pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+			__pagevec_flush(pvec, pv);
 	}
 }
 
@@ -432,42 +463,6 @@ void __pagevec_release_nonlru(struct pag
 }
 
 /*
- * Add the passed pages to the LRU, then drop the caller's refcount
- * on them.  Reinitialises the caller's pagevec.
- */
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
-{
-	int i;
-	struct zone *zone = NULL;
-	VM_BUG_ON(is_unevictable_lru(lru));
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
-
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		VM_BUG_ON(PageActive(page));
-		VM_BUG_ON(PageUnevictable(page));
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		if (is_active_lru(lru))
-			SetPageActive(page);
-		add_page_to_lru_list(zone, page, lru);
-	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
-}
-
-EXPORT_SYMBOL(____pagevec_lru_add);
-
-/*
  * Try to drop buffers from the pages in a pagevec
  */
 void pagevec_strip(struct pagevec *pvec)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-10-27 12:18 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-22 22:50 [patch 0/3] activate pages in batch Johannes Weiner
2008-10-22 22:50 ` [patch 1/3] swap: use an array for all pagevecs Johannes Weiner
2008-10-22 22:50 ` [patch 2/3] swap: refactor pagevec flushing Johannes Weiner
2008-10-27  6:50   ` Andrew Morton
2008-10-27  8:08     ` Johannes Weiner
2008-10-27 12:18       ` [patch 2/3 v2] " Johannes Weiner
2008-10-22 22:50 ` [patch 3/3] swap: cache page activation Johannes Weiner
2008-10-23  1:41 ` [patch 0/3] activate pages in batch KOSAKI Motohiro
2008-10-23  2:00   ` Johannes Weiner
2008-10-23  2:10     ` KOSAKI Motohiro
2008-10-23 16:21       ` Johannes Weiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox