linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
       [not found]   ` <200603171831.46811.rjw@sisk.pl>
@ 2006-03-18  4:14     ` Con Kolivas
  2006-03-18  4:41       ` Nick Piggin
  0 siblings, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2006-03-18  4:14 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: ck, Andreas Mohr, linux-mm, linux-kernel, Pavel Machek, Stefan Seyfried

This patch is a rewrite of the shrink_all_memory function used by swsusp
prior to suspending to disk.

The special hooks into balance_pgdat for shrink_all_memory have been removed
thus simplifying that function significantly.

Some code will now be compiled out in the !CONFIG_PM case.

shrink_all_memory now uses shrink_zone and shrink_slab directly with an extra
entry in the struct scan_control suspend_pass. This is used to alter what
lists will be shrunk by shrink_zone on successive passes. The aim of this is
to alter the reclaim logic to choose the best pages to keep on resume, to
free the minimum amount of memory required to suspend and free the memory
faster.

Signed-off-by: Con Kolivas <kernel@kolivas.org>

 include/linux/swap.h  |   45 +++++++++++++
 kernel/power/swsusp.c |   10 ---
 mm/vmscan.c           |  161 +++++++++++++++++++++++---------------------------
 3 files changed, 125 insertions(+), 91 deletions(-)

Index: linux-2.6.16-rc6-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.16-rc6-mm1.orig/mm/vmscan.c	2006-03-18 13:29:38.000000000 +1100
+++ linux-2.6.16-rc6-mm1/mm/vmscan.c	2006-03-18 13:58:51.000000000 +1100
@@ -55,27 +55,6 @@ typedef enum {
 	PAGE_CLEAN,
 } pageout_t;
 
-struct scan_control {
-	/* Incremented by the number of inactive pages that were scanned */
-	unsigned long nr_scanned;
-
-	unsigned long nr_mapped;	/* From page_state */
-
-	/* This context's GFP mask */
-	gfp_t gfp_mask;
-
-	int may_writepage;
-
-	/* Can pages be swapped as part of reclaim? */
-	int may_swap;
-
-	/* This context's SWAP_CLUSTER_MAX. If freeing memory for
-	 * suspend, we effectively ignore SWAP_CLUSTER_MAX.
-	 * In this context, it doesn't matter that we scan the
-	 * whole list at once. */
-	int swap_cluster_max;
-};
-
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
 #ifdef ARCH_HAS_PREFETCH
@@ -1327,7 +1306,8 @@ static void shrink_active_list(unsigned 
 }
 
 /*
- * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
+ * This is a basic per-zone page freer.  Used by kswapd, direct reclaim and
+ * the swsusp specific shrink_all_memory functions.
  */
 static unsigned long shrink_zone(int priority, struct zone *zone,
 				struct scan_control *sc)
@@ -1345,7 +1325,7 @@ static unsigned long shrink_zone(int pri
 	 */
 	zone->nr_scan_active += (zone->nr_active >> priority) + 1;
 	nr_active = zone->nr_scan_active;
-	if (nr_active >= sc->swap_cluster_max)
+	if (nr_active >= sc->swap_cluster_max && suspend_scan_active(sc))
 		zone->nr_scan_active = 0;
 	else
 		nr_active = 0;
@@ -1422,7 +1402,12 @@ static unsigned long shrink_zones(int pr
 	}
 	return nr_reclaimed;
 }
- 
+
+#define for_each_priority_reverse(priority)	\
+	for (priority = DEF_PRIORITY;		\
+		priority >= 0;			\
+		priority--)
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -1466,7 +1451,7 @@ unsigned long try_to_free_pages(struct z
 		lru_pages += zone->nr_active + zone->nr_inactive;
 	}
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for_each_priority_reverse(priority) {
 		sc.nr_mapped = read_page_state(nr_mapped);
 		sc.nr_scanned = 0;
 		if (!priority)
@@ -1516,10 +1501,6 @@ out:
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at pages_high.
  *
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies.  This is a software suspend
- * special.
- *
  * Returns the number of pages which were actually freed.
  *
  * There is special handling here for zones which are full of pinned pages.
@@ -1537,10 +1518,8 @@ out:
  * the page allocator fallback scheme to ensure that aging of pages is balanced
  * across the zones.
  */
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
-				int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
-	unsigned long to_free = nr_pages;
 	int all_zones_ok;
 	int priority;
 	int i;
@@ -1550,7 +1529,7 @@ static unsigned long balance_pgdat(pg_da
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_swap = 1,
-		.swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
 	};
 
 loop_again:
@@ -1567,7 +1546,7 @@ loop_again:
 		zone->temp_priority = DEF_PRIORITY;
 	}
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for_each_priority_reverse(priority) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
 
@@ -1577,31 +1556,27 @@ loop_again:
 
 		all_zones_ok = 1;
 
-		if (nr_pages == 0) {
-			/*
-			 * Scan in the highmem->dma direction for the highest
-			 * zone which needs scanning
-			 */
-			for (i = pgdat->nr_zones - 1; i >= 0; i--) {
-				struct zone *zone = pgdat->node_zones + i;
+		/*
+		 * Scan in the highmem->dma direction for the highest
+		 * zone which needs scanning
+		 */
+		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+			struct zone *zone = pgdat->node_zones + i;
 
-				if (!populated_zone(zone))
-					continue;
+			if (!populated_zone(zone))
+				continue;
 
-				if (zone->all_unreclaimable &&
-						priority != DEF_PRIORITY)
-					continue;
+			if (zone->all_unreclaimable &&
+					priority != DEF_PRIORITY)
+				continue;
 
-				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0)) {
-					end_zone = i;
-					goto scan;
-				}
+			if (!zone_watermark_ok(zone, order, zone->pages_high,
+					       0, 0)) {
+				end_zone = i;
+				goto scan;
 			}
-			goto out;
-		} else {
-			end_zone = pgdat->nr_zones - 1;
 		}
+		goto out;
 scan:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
@@ -1628,11 +1603,9 @@ scan:
 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
-			if (nr_pages == 0) {	/* Not software suspend */
-				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0))
-					all_zones_ok = 0;
-			}
+			if (!zone_watermark_ok(zone, order, zone->pages_high,
+					       end_zone, 0))
+				all_zones_ok = 0;
 			zone->temp_priority = priority;
 			if (zone->prev_priority > priority)
 				zone->prev_priority = priority;
@@ -1657,8 +1630,6 @@ scan:
 			    total_scanned > nr_reclaimed + nr_reclaimed / 2)
 				sc.may_writepage = 1;
 		}
-		if (nr_pages && to_free > nr_reclaimed)
-			continue;	/* swsusp: need to do more work */
 		if (all_zones_ok)
 			break;		/* kswapd: all done */
 		/*
@@ -1674,7 +1645,7 @@ scan:
 		 * matches the direct reclaim path behaviour in terms of impact
 		 * on zone->*_priority.
 		 */
-		if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+		if (nr_reclaimed >= SWAP_CLUSTER_MAX)
 			break;
 	}
 out:
@@ -1756,7 +1727,7 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		balance_pgdat(pgdat, order);
 	}
 	return 0;
 }
@@ -1786,36 +1757,58 @@ void wakeup_kswapd(struct zone *zone, in
 #ifdef CONFIG_PM
 /*
  * Try to free `nr_pages' of memory, system-wide.  Returns the number of freed
- * pages.
+ * pages. It does this via shrink_zone in passes. Rather than trying to age
+ * LRUs the aim is to preserve the overall LRU order by reclaiming
+ * preferentially inactive > active > active referenced > active mapped
  */
 unsigned long shrink_all_memory(unsigned long nr_pages)
 {
-	pg_data_t *pgdat;
-	unsigned long nr_to_free = nr_pages;
 	unsigned long ret = 0;
-	unsigned retry = 2;
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_swap = 1,
+		.swap_cluster_max = nr_pages,
+		.suspend_pass = 3,
+		.may_writepage = 1,
 	};
 
 	delay_swap_prefetch();
 
-	current->reclaim_state = &reclaim_state;
-repeat:
-	for_each_online_pgdat(pgdat) {
-		unsigned long freed;
+	do {
+		int priority;
 
-		freed = balance_pgdat(pgdat, nr_to_free, 0);
-		ret += freed;
-		nr_to_free -= freed;
-		if ((long)nr_to_free <= 0)
-			break;
-	}
-	if (retry-- && ret < nr_pages) {
-		blk_congestion_wait(WRITE, HZ/5);
-		goto repeat;
-	}
-	current->reclaim_state = NULL;
+		for_each_priority_reverse(priority) {
+			struct zone *zone;
+			unsigned long lru_pages = 0;
+
+			for_each_zone(zone) {
+				unsigned long freed;
+
+				if (!populated_zone(zone))
+					continue;
+
+				if (zone->all_unreclaimable &&
+				    priority != DEF_PRIORITY)
+					continue;
+
+				lru_pages += zone->nr_active +
+					zone->nr_inactive;
+				/*
+				 * shrink_active_list needs this to reclaim
+				 * mapped pages
+				 */
+				if (!sc.suspend_pass)
+					zone->prev_priority = 0;
+				freed = shrink_zone(priority, zone, &sc);
+				ret += freed;
+				if (ret > nr_pages)
+					goto out;
+			}
+			shrink_slab(0, sc.gfp_mask, lru_pages);
+		}
+		blk_congestion_wait(WRITE, HZ / 5);
+	} while (--sc.suspend_pass >= 0);
+out:
 	return ret;
 }
 #endif
Index: linux-2.6.16-rc6-mm1/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-rc6-mm1.orig/kernel/power/swsusp.c	2006-03-18 13:29:38.000000000 +1100
+++ linux-2.6.16-rc6-mm1/kernel/power/swsusp.c	2006-03-18 13:30:52.000000000 +1100
@@ -173,9 +173,6 @@ void free_all_swap_pages(int swap, struc
  *	Notice: all userland should be stopped before it is called, or
  *	livelock is possible.
  */
-
-#define SHRINK_BITE	10000
-
 int swsusp_shrink_memory(void)
 {
 	long size, tmp;
@@ -194,14 +191,13 @@ int swsusp_shrink_memory(void)
 		for_each_zone (zone)
 			if (!is_highmem(zone))
 				tmp -= zone->free_pages;
+		if (tmp <= 0)
+			tmp = size - image_size / PAGE_SIZE;
 		if (tmp > 0) {
-			tmp = shrink_all_memory(SHRINK_BITE);
+			tmp = shrink_all_memory(tmp);
 			if (!tmp)
 				return -ENOMEM;
 			pages += tmp;
-		} else if (size > image_size / PAGE_SIZE) {
-			tmp = shrink_all_memory(SHRINK_BITE);
-			pages += tmp;
 		}
 		printk("\b%c", p[i++%4]);
 	} while (tmp > 0);
Index: linux-2.6.16-rc6-mm1/include/linux/swap.h
===================================================================
--- linux-2.6.16-rc6-mm1.orig/include/linux/swap.h	2006-03-18 13:29:38.000000000 +1100
+++ linux-2.6.16-rc6-mm1/include/linux/swap.h	2006-03-18 14:50:11.000000000 +1100
@@ -66,6 +66,51 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
+struct scan_control {
+	/* Incremented by the number of inactive pages that were scanned */
+	unsigned long nr_scanned;
+
+	unsigned long nr_mapped;	/* From page_state */
+
+	/* This context's GFP mask */
+	gfp_t gfp_mask;
+
+	int may_writepage;
+
+	/* Can pages be swapped as part of reclaim? */
+	int may_swap;
+
+	/* This context's SWAP_CLUSTER_MAX. If freeing memory for
+	 * suspend, we effectively ignore SWAP_CLUSTER_MAX.
+	 * In this context, it doesn't matter that we scan the
+	 * whole list at once. */
+	int swap_cluster_max;
+
+#ifdef CONFIG_PM
+	/*
+	 * If we're doing suspend to disk, what pass is this.
+	 * We decrement to allow code to transparently do normal reclaim
+	 * without explicitly setting it to 0.
+	 *
+	 * 3 = Reclaim from inactive_list only
+	 * 2 = Reclaim from active list but don't reclaim mapped
+	 * 1 = 2nd pass of type 2
+	 * 0 = Reclaim mapped (normal reclaim)
+	 */
+	int suspend_pass;
+#endif
+};
+
+/*
+ * When scanning for the swsusp function shrink_all_memory we only shrink
+ * active lists on the 2nd pass.
+ */
+#ifdef CONFIG_PM
+#define suspend_scan_active(sc)	((sc)->suspend_pass < 3)
+#else
+#define suspend_scan_active(sc)	1
+#endif
+
 /*
  * current->reclaim_state points to one of these when a task is running
  * memory reclaim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  4:14     ` [PATCH][RFC] mm: swsusp shrink_all_memory tweaks Con Kolivas
@ 2006-03-18  4:41       ` Nick Piggin
  2006-03-18  4:46         ` Con Kolivas
  0 siblings, 1 reply; 9+ messages in thread
From: Nick Piggin @ 2006-03-18  4:41 UTC (permalink / raw)
  To: Con Kolivas
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried

Con Kolivas wrote:

> @@ -1567,7 +1546,7 @@ loop_again:
>  		zone->temp_priority = DEF_PRIORITY;
>  	}
>  
> -	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
> +	for_each_priority_reverse(priority) {

What's this for? The for loop is simple and easy to read, after
the change, you have to look somewhere else to see what it does.

> Index: linux-2.6.16-rc6-mm1/include/linux/swap.h
> ===================================================================
> --- linux-2.6.16-rc6-mm1.orig/include/linux/swap.h	2006-03-18 13:29:38.000000000 +1100
> +++ linux-2.6.16-rc6-mm1/include/linux/swap.h	2006-03-18 14:50:11.000000000 +1100
> @@ -66,6 +66,51 @@ typedef struct {
>  	unsigned long val;
>  } swp_entry_t;
>  
> +struct scan_control {

Why did you put this here? scan_control really can't go outside vmscan.c,
it is meant only to ease the passing of lots of parameters, and not as a
consistent interface.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  4:41       ` Nick Piggin
@ 2006-03-18  4:46         ` Con Kolivas
  2006-03-18  4:52           ` Nick Piggin
  0 siblings, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2006-03-18  4:46 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried

On Saturday 18 March 2006 15:41, Nick Piggin wrote:
> Con Kolivas wrote:
> > @@ -1567,7 +1546,7 @@ loop_again:
> >  		zone->temp_priority = DEF_PRIORITY;
> >  	}
> >
> > -	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
> > +	for_each_priority_reverse(priority) {
>
> What's this for? The for loop is simple and easy to read, after
> the change, you have to look somewhere else to see what it does.

Saw the same for loop 3 times and couldn't resist.

> > Index: linux-2.6.16-rc6-mm1/include/linux/swap.h
> > ===================================================================
> > --- linux-2.6.16-rc6-mm1.orig/include/linux/swap.h	2006-03-18
> > 13:29:38.000000000 +1100 +++
> > linux-2.6.16-rc6-mm1/include/linux/swap.h	2006-03-18 14:50:11.000000000
> > +1100 @@ -66,6 +66,51 @@ typedef struct {
> >  	unsigned long val;
> >  } swp_entry_t;
> >
> > +struct scan_control {
>
> Why did you put this here? scan_control really can't go outside vmscan.c,
> it is meant only to ease the passing of lots of parameters, and not as a
> consistent interface.

#ifdeffery

Cheers,
Con

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  4:46         ` Con Kolivas
@ 2006-03-18  4:52           ` Nick Piggin
  2006-03-18  4:56             ` Con Kolivas
  0 siblings, 1 reply; 9+ messages in thread
From: Nick Piggin @ 2006-03-18  4:52 UTC (permalink / raw)
  To: Con Kolivas
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried

Con Kolivas wrote:
> On Saturday 18 March 2006 15:41, Nick Piggin wrote:

>>>Index: linux-2.6.16-rc6-mm1/include/linux/swap.h
>>>===================================================================
>>>--- linux-2.6.16-rc6-mm1.orig/include/linux/swap.h	2006-03-18
>>>13:29:38.000000000 +1100 +++
>>>linux-2.6.16-rc6-mm1/include/linux/swap.h	2006-03-18 14:50:11.000000000
>>>+1100 @@ -66,6 +66,51 @@ typedef struct {
>>> 	unsigned long val;
>>> } swp_entry_t;
>>>
>>>+struct scan_control {
>>
>>Why did you put this here? scan_control really can't go outside vmscan.c,
>>it is meant only to ease the passing of lots of parameters, and not as a
>>consistent interface.
> 
> 
> #ifdeffery
> 

Sorry I don't understand...

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  4:52           ` Nick Piggin
@ 2006-03-18  4:56             ` Con Kolivas
  2006-03-18  5:44               ` Nick Piggin
  0 siblings, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2006-03-18  4:56 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried

On Saturday 18 March 2006 15:52, Nick Piggin wrote:
> Con Kolivas wrote:
> > On Saturday 18 March 2006 15:41, Nick Piggin wrote:
> >>>Index: linux-2.6.16-rc6-mm1/include/linux/swap.h
> >>>===================================================================
> >>>--- linux-2.6.16-rc6-mm1.orig/include/linux/swap.h	2006-03-18
> >>>13:29:38.000000000 +1100 +++
> >>>linux-2.6.16-rc6-mm1/include/linux/swap.h	2006-03-18 14:50:11.000000000
> >>>+1100 @@ -66,6 +66,51 @@ typedef struct {
> >>> 	unsigned long val;
> >>> } swp_entry_t;
> >>>
> >>>+struct scan_control {
> >>
> >>Why did you put this here? scan_control really can't go outside vmscan.c,
> >>it is meant only to ease the passing of lots of parameters, and not as a
> >>consistent interface.
> >
> > #ifdeffery
>
> Sorry I don't understand...

My bad.

I added the suspend_pass member to struct scan_control within an #ifdef 
CONFIG_PM to allow it to not be unnecessarily compiled in in the !CONFIG_PM 
case and wanted to avoid having the #ifdefs in vmscan.c so moved it to a 
header file.

Cheers,
Con

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  4:56             ` Con Kolivas
@ 2006-03-18  5:44               ` Nick Piggin
  2006-03-18  6:14                 ` Con Kolivas
  0 siblings, 1 reply; 9+ messages in thread
From: Nick Piggin @ 2006-03-18  5:44 UTC (permalink / raw)
  To: Con Kolivas
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried

Con Kolivas wrote:
> On Saturday 18 March 2006 15:52, Nick Piggin wrote:
> 
>>Con Kolivas wrote:
>>
>>>
>>>#ifdeffery
>>
>>Sorry I don't understand...
> 
> 
> My bad.
> 
> I added the suspend_pass member to struct scan_control within an #ifdef 
> CONFIG_PM to allow it to not be unnecessarily compiled in in the !CONFIG_PM 
> case and wanted to avoid having the #ifdefs in vmscan.c so moved it to a 
> header file.
> 

Oh no, that rule thumb isn't actually "don't put ifdefs in .c files", but
people commonly say it that way anyway. The rule is actually that you should
put ifdefs in declarations rather than call/usage sites.

You did the right thing there by introducing the accessor, which moves the
ifdef out of code that wants to query the member right? But you can still
leave it in the .c file if it is local (which it is).

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  5:44               ` Nick Piggin
@ 2006-03-18  6:14                 ` Con Kolivas
  2006-03-18  8:30                   ` Nick Piggin
  0 siblings, 1 reply; 9+ messages in thread
From: Con Kolivas @ 2006-03-18  6:14 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried, Greg KH

cc'ed GregKH for comment hopefully.

On Saturday 18 March 2006 16:44, Nick Piggin wrote:
> Con Kolivas wrote:
> > I added the suspend_pass member to struct scan_control within an #ifdef
> > CONFIG_PM to allow it to not be unnecessarily compiled in in the
> > !CONFIG_PM case and wanted to avoid having the #ifdefs in vmscan.c so
> > moved it to a header file.
>
> Oh no, that rule thumb isn't actually "don't put ifdefs in .c files", but
> people commonly say it that way anyway. The rule is actually that you
> should put ifdefs in declarations rather than call/usage sites.

There isn't a formal reference to this in the Codingstyle documentation, but 
Greg's 2002 ols presentation says simply says no ifdefs in .c files.

http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/mgp00031.html

I'm confused now because I've been working very hard to do this with all code.

> You did the right thing there by introducing the accessor, which moves the
> ifdef out of code that wants to query the member right? But you can still
> leave it in the .c file if it is local (which it is).

Once again I'm happy to do the right thing; I'm just not sure what that is.

Cheers,
Con

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  6:14                 ` Con Kolivas
@ 2006-03-18  8:30                   ` Nick Piggin
  2006-03-18  9:40                     ` Con Kolivas
  0 siblings, 1 reply; 9+ messages in thread
From: Nick Piggin @ 2006-03-18  8:30 UTC (permalink / raw)
  To: Con Kolivas
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried, Greg KH

Con Kolivas wrote:
> cc'ed GregKH for comment hopefully.

>>You did the right thing there by introducing the accessor, which moves the
>>ifdef out of code that wants to query the member right? But you can still
>>leave it in the .c file if it is local (which it is).
> 
> 
> Once again I'm happy to do the right thing; I'm just not sure what that is.
> 

Well, struct scan_control escaping from vmscan.c is not the right thing
(try to get that past Andrew!). Obviously in this case, having the ifdef
in the .c file is OK.

I guess Greg's presentation is a first order approximation to get people
thinking in the right way. I mean we do it all the time, and in core kernel
code too (our favourite sched.c is a prime example).

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][RFC] mm: swsusp shrink_all_memory tweaks
  2006-03-18  8:30                   ` Nick Piggin
@ 2006-03-18  9:40                     ` Con Kolivas
  0 siblings, 0 replies; 9+ messages in thread
From: Con Kolivas @ 2006-03-18  9:40 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Rafael J. Wysocki, ck, Andreas Mohr, linux-mm, linux-kernel,
	Pavel Machek, Stefan Seyfried, Greg KH

On Saturday 18 March 2006 19:30, Nick Piggin wrote:
> Con Kolivas wrote:
> > cc'ed GregKH for comment hopefully.
> >
> >>You did the right thing there by introducing the accessor, which moves
> >> the ifdef out of code that wants to query the member right? But you can
> >> still leave it in the .c file if it is local (which it is).
> >
> > Once again I'm happy to do the right thing; I'm just not sure what that
> > is.
>
> Well, struct scan_control escaping from vmscan.c is not the right thing
> (try to get that past Andrew!). Obviously in this case, having the ifdef
> in the .c file is OK.
>
> I guess Greg's presentation is a first order approximation to get people
> thinking in the right way. I mean we do it all the time, and in core kernel
> code too (our favourite sched.c is a prime example).

Ok here's a respin without touching swap.h and leaving the code otherwise the
same.

Cheers,
Con
---
This patch is a rewrite of the shrink_all_memory function used by swsusp
prior to suspending to disk.

The special hooks into balance_pgdat for shrink_all_memory have been removed
thus simplifying that function significantly.

Some code will now be compiled out in the !CONFIG_PM case.

shrink_all_memory now uses shrink_zone and shrink_slab directly with an extra
entry in the struct scan_control suspend_pass. This is used to alter what
lists will be shrunk by shrink_zone on successive passes. The aim of this is
to alter the reclaim logic to choose the best pages to keep on resume, to
free the minimum amount of memory required to suspend and free the memory
faster.

Signed-off-by: Con Kolivas <kernel@kolivas.org>

 kernel/power/swsusp.c |   10 ---
 mm/vmscan.c           |  164 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 104 insertions(+), 70 deletions(-)

Index: linux-2.6.16-rc6-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.16-rc6-mm1.orig/mm/vmscan.c	2006-03-18 13:29:38.000000000 +1100
+++ linux-2.6.16-rc6-mm1/mm/vmscan.c	2006-03-18 19:47:38.000000000 +1100
@@ -74,8 +74,32 @@ struct scan_control {
 	 * In this context, it doesn't matter that we scan the
 	 * whole list at once. */
 	int swap_cluster_max;
+
+#ifdef CONFIG_PM
+	/*
+	 * If we're doing suspend to disk, what pass is this.
+	 * We decrement to allow code to transparently do normal reclaim
+	 * without explicitly setting it to 0.
+	 *
+	 * 3 = Reclaim from inactive_list only
+	 * 2 = Reclaim from active list but don't reclaim mapped
+	 * 1 = 2nd pass of type 2
+	 * 0 = Reclaim mapped (normal reclaim)
+	 */
+	int suspend_pass;
+#endif
 };
 
+/*
+ * When scanning for the swsusp function shrink_all_memory we only shrink
+ * active lists on the 2nd pass.
+ */
+#ifdef CONFIG_PM
+#define suspend_scan_active(sc)	((sc)->suspend_pass < 3)
+#else
+#define suspend_scan_active(sc)	1
+#endif
+
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
 #ifdef ARCH_HAS_PREFETCH
@@ -1327,7 +1351,8 @@ static void shrink_active_list(unsigned 
 }
 
 /*
- * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
+ * This is a basic per-zone page freer.  Used by kswapd, direct reclaim and
+ * the swsusp specific shrink_all_memory functions.
  */
 static unsigned long shrink_zone(int priority, struct zone *zone,
 				struct scan_control *sc)
@@ -1345,7 +1370,7 @@ static unsigned long shrink_zone(int pri
 	 */
 	zone->nr_scan_active += (zone->nr_active >> priority) + 1;
 	nr_active = zone->nr_scan_active;
-	if (nr_active >= sc->swap_cluster_max)
+	if (nr_active >= sc->swap_cluster_max && suspend_scan_active(sc))
 		zone->nr_scan_active = 0;
 	else
 		nr_active = 0;
@@ -1422,7 +1447,12 @@ static unsigned long shrink_zones(int pr
 	}
 	return nr_reclaimed;
 }
- 
+
+#define for_each_priority_reverse(priority)	\
+	for (priority = DEF_PRIORITY;		\
+		priority >= 0;			\
+		priority--)
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -1466,7 +1496,7 @@ unsigned long try_to_free_pages(struct z
 		lru_pages += zone->nr_active + zone->nr_inactive;
 	}
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for_each_priority_reverse(priority) {
 		sc.nr_mapped = read_page_state(nr_mapped);
 		sc.nr_scanned = 0;
 		if (!priority)
@@ -1516,10 +1546,6 @@ out:
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at pages_high.
  *
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies.  This is a software suspend
- * special.
- *
  * Returns the number of pages which were actually freed.
  *
  * There is special handling here for zones which are full of pinned pages.
@@ -1537,10 +1563,8 @@ out:
  * the page allocator fallback scheme to ensure that aging of pages is balanced
  * across the zones.
  */
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
-				int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
-	unsigned long to_free = nr_pages;
 	int all_zones_ok;
 	int priority;
 	int i;
@@ -1550,7 +1574,7 @@ static unsigned long balance_pgdat(pg_da
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_swap = 1,
-		.swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
 	};
 
 loop_again:
@@ -1567,7 +1591,7 @@ loop_again:
 		zone->temp_priority = DEF_PRIORITY;
 	}
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for_each_priority_reverse(priority) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
 
@@ -1577,31 +1601,27 @@ loop_again:
 
 		all_zones_ok = 1;
 
-		if (nr_pages == 0) {
-			/*
-			 * Scan in the highmem->dma direction for the highest
-			 * zone which needs scanning
-			 */
-			for (i = pgdat->nr_zones - 1; i >= 0; i--) {
-				struct zone *zone = pgdat->node_zones + i;
+		/*
+		 * Scan in the highmem->dma direction for the highest
+		 * zone which needs scanning
+		 */
+		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+			struct zone *zone = pgdat->node_zones + i;
 
-				if (!populated_zone(zone))
-					continue;
+			if (!populated_zone(zone))
+				continue;
 
-				if (zone->all_unreclaimable &&
-						priority != DEF_PRIORITY)
-					continue;
+			if (zone->all_unreclaimable &&
+					priority != DEF_PRIORITY)
+				continue;
 
-				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0)) {
-					end_zone = i;
-					goto scan;
-				}
+			if (!zone_watermark_ok(zone, order, zone->pages_high,
+					       0, 0)) {
+				end_zone = i;
+				goto scan;
 			}
-			goto out;
-		} else {
-			end_zone = pgdat->nr_zones - 1;
 		}
+		goto out;
 scan:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
@@ -1628,11 +1648,9 @@ scan:
 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
-			if (nr_pages == 0) {	/* Not software suspend */
-				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0))
-					all_zones_ok = 0;
-			}
+			if (!zone_watermark_ok(zone, order, zone->pages_high,
+					       end_zone, 0))
+				all_zones_ok = 0;
 			zone->temp_priority = priority;
 			if (zone->prev_priority > priority)
 				zone->prev_priority = priority;
@@ -1657,8 +1675,6 @@ scan:
 			    total_scanned > nr_reclaimed + nr_reclaimed / 2)
 				sc.may_writepage = 1;
 		}
-		if (nr_pages && to_free > nr_reclaimed)
-			continue;	/* swsusp: need to do more work */
 		if (all_zones_ok)
 			break;		/* kswapd: all done */
 		/*
@@ -1674,7 +1690,7 @@ scan:
 		 * matches the direct reclaim path behaviour in terms of impact
 		 * on zone->*_priority.
 		 */
-		if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+		if (nr_reclaimed >= SWAP_CLUSTER_MAX)
 			break;
 	}
 out:
@@ -1756,7 +1772,7 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		balance_pgdat(pgdat, order);
 	}
 	return 0;
 }
@@ -1786,36 +1802,58 @@ void wakeup_kswapd(struct zone *zone, in
 #ifdef CONFIG_PM
 /*
  * Try to free `nr_pages' of memory, system-wide.  Returns the number of freed
- * pages.
+ * pages. It does this via shrink_zone in passes. Rather than trying to age
+ * LRUs the aim is to preserve the overall LRU order by reclaiming
+ * preferentially inactive > active > active referenced > active mapped
  */
 unsigned long shrink_all_memory(unsigned long nr_pages)
 {
-	pg_data_t *pgdat;
-	unsigned long nr_to_free = nr_pages;
 	unsigned long ret = 0;
-	unsigned retry = 2;
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_swap = 1,
+		.swap_cluster_max = nr_pages,
+		.suspend_pass = 3,
+		.may_writepage = 1,
 	};
 
 	delay_swap_prefetch();
 
-	current->reclaim_state = &reclaim_state;
-repeat:
-	for_each_online_pgdat(pgdat) {
-		unsigned long freed;
+	do {
+		int priority;
 
-		freed = balance_pgdat(pgdat, nr_to_free, 0);
-		ret += freed;
-		nr_to_free -= freed;
-		if ((long)nr_to_free <= 0)
-			break;
-	}
-	if (retry-- && ret < nr_pages) {
-		blk_congestion_wait(WRITE, HZ/5);
-		goto repeat;
-	}
-	current->reclaim_state = NULL;
+		for_each_priority_reverse(priority) {
+			struct zone *zone;
+			unsigned long lru_pages = 0;
+
+			for_each_zone(zone) {
+				unsigned long freed;
+
+				if (!populated_zone(zone))
+					continue;
+
+				if (zone->all_unreclaimable &&
+				    priority != DEF_PRIORITY)
+					continue;
+
+				lru_pages += zone->nr_active +
+					zone->nr_inactive;
+				/*
+				 * shrink_active_list needs this to reclaim
+				 * mapped pages
+				 */
+				if (!sc.suspend_pass)
+					zone->prev_priority = 0;
+				freed = shrink_zone(priority, zone, &sc);
+				ret += freed;
+				if (ret > nr_pages)
+					goto out;
+			}
+			shrink_slab(0, sc.gfp_mask, lru_pages);
+		}
+		blk_congestion_wait(WRITE, HZ / 5);
+	} while (--sc.suspend_pass >= 0);
+out:
 	return ret;
 }
 #endif
Index: linux-2.6.16-rc6-mm1/kernel/power/swsusp.c
===================================================================
--- linux-2.6.16-rc6-mm1.orig/kernel/power/swsusp.c	2006-03-18 13:29:38.000000000 +1100
+++ linux-2.6.16-rc6-mm1/kernel/power/swsusp.c	2006-03-18 13:30:52.000000000 +1100
@@ -173,9 +173,6 @@ void free_all_swap_pages(int swap, struc
  *	Notice: all userland should be stopped before it is called, or
  *	livelock is possible.
  */
-
-#define SHRINK_BITE	10000
-
 int swsusp_shrink_memory(void)
 {
 	long size, tmp;
@@ -194,14 +191,13 @@ int swsusp_shrink_memory(void)
 		for_each_zone (zone)
 			if (!is_highmem(zone))
 				tmp -= zone->free_pages;
+		if (tmp <= 0)
+			tmp = size - image_size / PAGE_SIZE;
 		if (tmp > 0) {
-			tmp = shrink_all_memory(SHRINK_BITE);
+			tmp = shrink_all_memory(tmp);
 			if (!tmp)
 				return -ENOMEM;
 			pages += tmp;
-		} else if (size > image_size / PAGE_SIZE) {
-			tmp = shrink_all_memory(SHRINK_BITE);
-			pages += tmp;
 		}
 		printk("\b%c", p[i++%4]);
 	} while (tmp > 0);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2006-03-18  9:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <200603101704.AA00798@bbb-jz5c7z9hn9y.digitalinfra.co.jp>
     [not found] ` <200603171717.23288.kernel@kolivas.org>
     [not found]   ` <200603171831.46811.rjw@sisk.pl>
2006-03-18  4:14     ` [PATCH][RFC] mm: swsusp shrink_all_memory tweaks Con Kolivas
2006-03-18  4:41       ` Nick Piggin
2006-03-18  4:46         ` Con Kolivas
2006-03-18  4:52           ` Nick Piggin
2006-03-18  4:56             ` Con Kolivas
2006-03-18  5:44               ` Nick Piggin
2006-03-18  6:14                 ` Con Kolivas
2006-03-18  8:30                   ` Nick Piggin
2006-03-18  9:40                     ` Con Kolivas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox