[PATCH] Remove struct reclaim

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] Remove struct reclaim_state
@ 2005-04-29  1:15 Matthew Dobson
  2005-04-30  0:51 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Dobson @ 2005-04-29  1:15 UTC (permalink / raw)
  To: linux-kernel, Linux Memory Management, Bligh, Martin J.

[-- Attachment #1: Type: text/plain, Size: 1620 bytes --]

So I made a note to myself a while back to have a look at what this struct
reclaim_state thing was all about.  It turns out it's referenced in WAY too
many places for what it does.

The idea is that during memory reclaim, we want to keep track of how many
pages get freed up by the various slab shrinkers.  The memory is eventually
freed in kmem_freepages() and there is not convenient place to store the
number of slab pages we freed.  So, at some point in history, we decided to
store it in the current task's task structure.  Unfortunately we decided to
store it in a really obfuscated way that made my brain hurt.  So I ripped
it out.

The only place that *ever* updates current->reclaim_state->reclaimed_slab
to be anything but 0 is kmem_freepages().  The only places that give a
woozy wombat's bottom about the value of reclaimed_slab are
try_to_free_pages() & balance_pgdat().  These two functions zero
current->reclaim_state->reclaimed_slab, call shrink_slab(), then look at
the value.  Since shrink_slab() currently returns 0 no matter what happens,
I changed it to return the number of slab pages freed.  This saves us 30
lines of code, makes the what's going on much more clear, and may even be a
smidge faster.  It builds, boots, and survives kernbench runs on i386.

mcd@arrakis:~/linux/source $ diffstat
~/linux/patches/remove-reclaim_state.patch
 include/linux/sched.h |    3 +--
 include/linux/swap.h  |    8 --------
 mm/page_alloc.c       |    6 ------
 mm/slab.c             |    3 +--
 mm/vmscan.c           |   26 ++++++--------------------
 5 files changed, 8 insertions(+), 38 deletions(-)

-Matt

[-- Attachment #2: remove-reclaim_state.patch --]
[-- Type: text/x-patch, Size: 6581 bytes --]

diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/include/linux/sched.h linux-2.6.12-rc3+remove-reclaim_state/include/linux/sched.h
--- linux-2.6.12-rc3/include/linux/sched.h	2005-04-27 10:59:19.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/include/linux/sched.h	2005-04-28 17:30:09.474874296 -0700
@@ -425,7 +425,6 @@ extern struct user_struct root_user;
 
 typedef struct prio_array prio_array_t;
 struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHEDSTATS
 struct sched_info {
@@ -705,7 +704,7 @@ struct task_struct {
 	void *journal_info;
 
 /* VM state */
-	struct reclaim_state *reclaim_state;
+	unsigned long reclaimed_slab;
 
 	struct dentry *proc_dentry;
 	struct backing_dev_info *backing_dev_info;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/include/linux/swap.h linux-2.6.12-rc3+remove-reclaim_state/include/linux/swap.h
--- linux-2.6.12-rc3/include/linux/swap.h	2005-04-27 10:59:18.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/include/linux/swap.h	2005-04-27 15:54:49.000000000 -0700
@@ -65,14 +65,6 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
-/*
- * current->reclaim_state points to one of these when a task is running
- * memory reclaim
- */
-struct reclaim_state {
-	unsigned long reclaimed_slab;
-};
-
 #ifdef __KERNEL__
 
 struct address_space;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/page_alloc.c linux-2.6.12-rc3+remove-reclaim_state/mm/page_alloc.c
--- linux-2.6.12-rc3/mm/page_alloc.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/page_alloc.c	2005-04-27 17:08:33.000000000 -0700
@@ -732,7 +732,6 @@ __alloc_pages(unsigned int __nocast gfp_
 	const int wait = gfp_mask & __GFP_WAIT;
 	struct zone **zones, *z;
 	struct page *page;
-	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
 	int classzone_idx;
@@ -820,12 +819,7 @@ rebalance:
 
 	/* We now go into synchronous reclaim */
 	p->flags |= PF_MEMALLOC;
-	reclaim_state.reclaimed_slab = 0;
-	p->reclaim_state = &reclaim_state;
-
 	did_some_progress = try_to_free_pages(zones, gfp_mask, order);
-
-	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
 	cond_resched();
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/slab.c linux-2.6.12-rc3+remove-reclaim_state/mm/slab.c
--- linux-2.6.12-rc3/mm/slab.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/slab.c	2005-04-28 17:30:25.472442296 -0700
@@ -937,8 +937,7 @@ static void kmem_freepages(kmem_cache_t 
 		page++;
 	}
 	sub_page_state(nr_slab, nr_freed);
-	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += nr_freed;
+	current->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 
 		atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/vmscan.c linux-2.6.12-rc3+remove-reclaim_state/mm/vmscan.c
--- linux-2.6.12-rc3/mm/vmscan.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/vmscan.c	2005-04-28 17:31:05.579345120 -0700
@@ -180,6 +180,8 @@ EXPORT_SYMBOL(remove_shrinker);
  * `lru_pages' represents the number of on-LRU pages in all the zones which
  * are eligible for the caller's allocation attempt.  It is used for balancing
  * slab reclaim versus page reclaim.
+ *
+ * Return number of slab pages freed.
  */
 static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
 			unsigned long lru_pages)
@@ -192,6 +194,7 @@ static int shrink_slab(unsigned long sca
 	if (!down_read_trylock(&shrinker_rwsem))
 		return 0;
 
+	current->reclaimed_slab = 0;
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 		unsigned long total_scan;
@@ -222,7 +225,7 @@ static int shrink_slab(unsigned long sca
 		shrinker->nr += total_scan;
 	}
 	up_read(&shrinker_rwsem);
-	return 0;
+	return current->reclaimed_slab;
 }
 
 /* Called without lock on whether page is mapped, so answer is unstable */
@@ -913,7 +916,6 @@ int try_to_free_pages(struct zone **zone
 	int priority;
 	int ret = 0;
 	int total_scanned = 0, total_reclaimed = 0;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 	unsigned long lru_pages = 0;
 	int i;
@@ -940,11 +942,7 @@ int try_to_free_pages(struct zone **zone
 		sc.priority = priority;
 		sc.swap_cluster_max = SWAP_CLUSTER_MAX;
 		shrink_caches(zones, &sc);
-		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
-		if (reclaim_state) {
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
-			reclaim_state->reclaimed_slab = 0;
-		}
+		sc.nr_reclaimed += shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
 		total_scanned += sc.nr_scanned;
 		total_reclaimed += sc.nr_reclaimed;
 		if (total_reclaimed >= sc.swap_cluster_max) {
@@ -1012,7 +1010,6 @@ static int balance_pgdat(pg_data_t *pgda
 	int priority;
 	int i;
 	int total_scanned, total_reclaimed;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 
 loop_again:
@@ -1099,9 +1096,7 @@ scan:
 			sc.priority = priority;
 			sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
 			shrink_zone(zone, &sc);
-			reclaim_state->reclaimed_slab = 0;
-			shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+			sc.nr_reclaimed += shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
 			total_reclaimed += sc.nr_reclaimed;
 			total_scanned += sc.nr_scanned;
 			if (zone->all_unreclaimable)
@@ -1171,16 +1166,12 @@ static int kswapd(void *p)
 	pg_data_t *pgdat = (pg_data_t*)p;
 	struct task_struct *tsk = current;
 	DEFINE_WAIT(wait);
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 	cpumask_t cpumask;
 
 	daemonize("kswapd%d", pgdat->node_id);
 	cpumask = node_to_cpumask(pgdat->node_id);
 	if (!cpus_empty(cpumask))
 		set_cpus_allowed(tsk, cpumask);
-	current->reclaim_state = &reclaim_state;
 
 	/*
 	 * Tell the memory management that we're a "memory allocator",
@@ -1254,11 +1245,7 @@ int shrink_all_memory(int nr_pages)
 	pg_data_t *pgdat;
 	int nr_to_free = nr_pages;
 	int ret = 0;
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 
-	current->reclaim_state = &reclaim_state;
 	for_each_pgdat(pgdat) {
 		int freed;
 		freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1267,7 +1254,6 @@ int shrink_all_memory(int nr_pages)
 		if (nr_to_free <= 0)
 			break;
 	}
-	current->reclaim_state = NULL;
 	return ret;
 }
 #endif

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Remove struct reclaim_state
  2005-04-29  1:15 [PATCH] Remove struct reclaim_state Matthew Dobson
@ 2005-04-30  0:51 ` Andrew Morton
  2005-05-02 18:44   ` Matthew Dobson
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2005-04-30  0:51 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, linux-mm, mbligh

Matthew Dobson <colpatch@us.ibm.com> wrote:
>
> Since shrink_slab() currently returns 0 no matter what happens,
>  I changed it to return the number of slab pages freed.

A sane cleanup, but it conflicts with vmscan-notice-slab-shrinking.patch,
which returns a different thing from shrink_slab() in order to account for
slab reclaim only causing internal fragmentation and not actually freeing
pages yet.

vmscan-notice-slab-shrinking.patch isn't quite complete yet, but we do need
to do something along these lines.  I need to get back onto it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Remove struct reclaim_state
  2005-04-30  0:51 ` Andrew Morton
@ 2005-05-02 18:44   ` Matthew Dobson
  2005-05-02 19:39     ` Matthew Dobson
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Dobson @ 2005-05-02 18:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm, mbligh

[-- Attachment #1: Type: text/plain, Size: 1244 bytes --]

Andrew Morton wrote:
> Matthew Dobson <colpatch@us.ibm.com> wrote:
> 
>>Since shrink_slab() currently returns 0 no matter what happens,
>> I changed it to return the number of slab pages freed.
> 
> 
> A sane cleanup, but it conflicts with vmscan-notice-slab-shrinking.patch,
> which returns a different thing from shrink_slab() in order to account for
> slab reclaim only causing internal fragmentation and not actually freeing
> pages yet.
> 
> vmscan-notice-slab-shrinking.patch isn't quite complete yet, but we do need
> to do something along these lines.  I need to get back onto it.

I hadn't seen that patch...  These can coexist fairly easily, though.  I
can change my patch to zero and then read current->reclaimed_slab *outside*
the call to shrink_slab.  It unfortunately shrinks less lines of code, and
leaves the hack that is current->reclaimed_slab exposed to more than one
function, but...

How's this look?

mcd@arrakis:~/linux/patches $ diffstat remove-reclaim_state.patch
 include/linux/sched.h |    3 +--
 include/linux/swap.h  |    8 --------
 mm/page_alloc.c       |    6 ------
 mm/slab.c             |    3 +--
 mm/vmscan.c           |   21 ++++-----------------
 5 files changed, 6 insertions(+), 35 deletions(-)

-Matt


[-- Attachment #2: remove-reclaim_state.patch --]
[-- Type: text/x-patch, Size: 5687 bytes --]

diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/include/linux/sched.h linux-2.6.12-rc3+remove-reclaim_state/include/linux/sched.h
--- linux-2.6.12-rc3/include/linux/sched.h	2005-04-27 10:59:19.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/include/linux/sched.h	2005-04-28 17:30:09.000000000 -0700
@@ -425,7 +425,6 @@ extern struct user_struct root_user;
 
 typedef struct prio_array prio_array_t;
 struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHEDSTATS
 struct sched_info {
@@ -705,7 +704,7 @@ struct task_struct {
 	void *journal_info;
 
 /* VM state */
-	struct reclaim_state *reclaim_state;
+	unsigned long reclaimed_slab;
 
 	struct dentry *proc_dentry;
 	struct backing_dev_info *backing_dev_info;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/include/linux/swap.h linux-2.6.12-rc3+remove-reclaim_state/include/linux/swap.h
--- linux-2.6.12-rc3/include/linux/swap.h	2005-04-27 10:59:18.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/include/linux/swap.h	2005-04-27 15:54:49.000000000 -0700
@@ -65,14 +65,6 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
-/*
- * current->reclaim_state points to one of these when a task is running
- * memory reclaim
- */
-struct reclaim_state {
-	unsigned long reclaimed_slab;
-};
-
 #ifdef __KERNEL__
 
 struct address_space;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/page_alloc.c linux-2.6.12-rc3+remove-reclaim_state/mm/page_alloc.c
--- linux-2.6.12-rc3/mm/page_alloc.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/page_alloc.c	2005-04-27 17:08:33.000000000 -0700
@@ -732,7 +732,6 @@ __alloc_pages(unsigned int __nocast gfp_
 	const int wait = gfp_mask & __GFP_WAIT;
 	struct zone **zones, *z;
 	struct page *page;
-	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
 	int classzone_idx;
@@ -820,12 +819,7 @@ rebalance:
 
 	/* We now go into synchronous reclaim */
 	p->flags |= PF_MEMALLOC;
-	reclaim_state.reclaimed_slab = 0;
-	p->reclaim_state = &reclaim_state;
-
 	did_some_progress = try_to_free_pages(zones, gfp_mask, order);
-
-	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
 	cond_resched();
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/slab.c linux-2.6.12-rc3+remove-reclaim_state/mm/slab.c
--- linux-2.6.12-rc3/mm/slab.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/slab.c	2005-04-28 17:30:25.000000000 -0700
@@ -937,8 +937,7 @@ static void kmem_freepages(kmem_cache_t 
 		page++;
 	}
 	sub_page_state(nr_slab, nr_freed);
-	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += nr_freed;
+	current->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 
 		atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3/mm/vmscan.c linux-2.6.12-rc3+remove-reclaim_state/mm/vmscan.c
--- linux-2.6.12-rc3/mm/vmscan.c	2005-04-27 11:00:00.000000000 -0700
+++ linux-2.6.12-rc3+remove-reclaim_state/mm/vmscan.c	2005-05-02 11:37:28.852114056 -0700
@@ -913,7 +913,6 @@ int try_to_free_pages(struct zone **zone
 	int priority;
 	int ret = 0;
 	int total_scanned = 0, total_reclaimed = 0;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 	unsigned long lru_pages = 0;
 	int i;
@@ -940,11 +939,9 @@ int try_to_free_pages(struct zone **zone
 		sc.priority = priority;
 		sc.swap_cluster_max = SWAP_CLUSTER_MAX;
 		shrink_caches(zones, &sc);
+		current->reclaimed_slab = 0;
 		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
-		if (reclaim_state) {
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
-			reclaim_state->reclaimed_slab = 0;
-		}
+		sc.nr_reclaimed += current->reclaimed_slab;
 		total_scanned += sc.nr_scanned;
 		total_reclaimed += sc.nr_reclaimed;
 		if (total_reclaimed >= sc.swap_cluster_max) {
@@ -1012,7 +1009,6 @@ static int balance_pgdat(pg_data_t *pgda
 	int priority;
 	int i;
 	int total_scanned, total_reclaimed;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 
 loop_again:
@@ -1099,9 +1095,9 @@ scan:
 			sc.priority = priority;
 			sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
 			shrink_zone(zone, &sc);
-			reclaim_state->reclaimed_slab = 0;
+			current->reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+			sc.nr_reclaimed += current->reclaimed_slab;
 			total_reclaimed += sc.nr_reclaimed;
 			total_scanned += sc.nr_scanned;
 			if (zone->all_unreclaimable)
@@ -1171,16 +1167,12 @@ static int kswapd(void *p)
 	pg_data_t *pgdat = (pg_data_t*)p;
 	struct task_struct *tsk = current;
 	DEFINE_WAIT(wait);
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 	cpumask_t cpumask;
 
 	daemonize("kswapd%d", pgdat->node_id);
 	cpumask = node_to_cpumask(pgdat->node_id);
 	if (!cpus_empty(cpumask))
 		set_cpus_allowed(tsk, cpumask);
-	current->reclaim_state = &reclaim_state;
 
 	/*
 	 * Tell the memory management that we're a "memory allocator",
@@ -1254,11 +1246,7 @@ int shrink_all_memory(int nr_pages)
 	pg_data_t *pgdat;
 	int nr_to_free = nr_pages;
 	int ret = 0;
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 
-	current->reclaim_state = &reclaim_state;
 	for_each_pgdat(pgdat) {
 		int freed;
 		freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1267,7 +1255,6 @@ int shrink_all_memory(int nr_pages)
 		if (nr_to_free <= 0)
 			break;
 	}
-	current->reclaim_state = NULL;
 	return ret;
 }
 #endif

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Remove struct reclaim_state
  2005-05-02 18:44   ` Matthew Dobson
@ 2005-05-02 19:39     ` Matthew Dobson
  0 siblings, 0 replies; 4+ messages in thread
From: Matthew Dobson @ 2005-05-02 19:39 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: Andrew Morton, linux-kernel, linux-mm, mbligh

[-- Attachment #1: Type: text/plain, Size: 2757 bytes --]

Matthew Dobson wrote:
> Andrew Morton wrote:
> 
>>Matthew Dobson <colpatch@us.ibm.com> wrote:
>>
>>
>>>Since shrink_slab() currently returns 0 no matter what happens,
>>>I changed it to return the number of slab pages freed.
>>
>>
>>A sane cleanup, but it conflicts with vmscan-notice-slab-shrinking.patch,
>>which returns a different thing from shrink_slab() in order to account for
>>slab reclaim only causing internal fragmentation and not actually freeing
>>pages yet.
>>
>>vmscan-notice-slab-shrinking.patch isn't quite complete yet, but we do need
>>to do something along these lines.  I need to get back onto it.
> 
> 
> I hadn't seen that patch...  These can coexist fairly easily, though.  I
> can change my patch to zero and then read current->reclaimed_slab *outside*
> the call to shrink_slab.  It unfortunately shrinks less lines of code, and
> leaves the hack that is current->reclaimed_slab exposed to more than one
> function, but...
> 
> How's this look?
> 
> mcd@arrakis:~/linux/patches $ diffstat remove-reclaim_state.patch
>  include/linux/sched.h |    3 +--
>  include/linux/swap.h  |    8 --------
>  mm/page_alloc.c       |    6 ------
>  mm/slab.c             |    3 +--
>  mm/vmscan.c           |   21 ++++-----------------
>  5 files changed, 6 insertions(+), 35 deletions(-)
> 
> -Matt

Ok.  Now I'm getting REALLY crazy.  I noticed that try_to_free_pages() &
balance_pgdat() (the two callers of shrink_slab()) do something really
dumb: they set sc.nr_scanned to 0, call shrink_slab() with sc.nr_scanned as
a pass-by-value parameter, then do total_scanned += sc.nr_scanned, despite
knowing damn well that sc.nr_scanned = 0, since _they just set it to that_.
 I doubt we really want to be constantly incrementing total_scanned by 0.
We either don't want to update it, or we want to update it by the value
scanned gets in shrink_slab().

So, you're probably asking yourself, what is the point of all this
rambling?  What if we change shrink_slab() to take a struct scan_control *
instead of an unsigned long for it's first argument?  Then we can
_actually_ update nr_scanned AND nr_reclaimed directly inside of
shrink_slab() instead of trying to pass all this data back to the two
partially brain-dead callers.

Updated to 2.6.12-rc3-mm2, which includes
vmscan-notice-slab-shrinking.patch.  Look at all those -'s!! :)

mcd@arrakis:~/linux/patches $ diffstat remove-reclaim_state.patch
include/linux/sched.h |    3 +--
 include/linux/swap.h  |    8 --------
 mm/page_alloc.c       |    6 ------
 mm/slab.c             |    4 ++--
 mm/vmscan.c           |   36 ++++++++++++------------------------
 5 files changed, 15 insertions(+), 42 deletions(-)

Running it through a build & boot test now.  I'll let you know if it barfs.

-Matt

[-- Attachment #2: remove-reclaim_state.patch --]
[-- Type: text/x-patch, Size: 6982 bytes --]

diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3-mm2/include/linux/sched.h linux-2.6.12-rc3-mm2+remove-reclaim_state/include/linux/sched.h
--- linux-2.6.12-rc3-mm2/include/linux/sched.h	2005-05-02 12:16:13.820665112 -0700
+++ linux-2.6.12-rc3-mm2+remove-reclaim_state/include/linux/sched.h	2005-05-02 12:17:24.095981640 -0700
@@ -451,7 +451,6 @@ extern struct user_struct root_user;
 
 typedef struct prio_array prio_array_t;
 struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHEDSTATS
 struct sched_info {
@@ -751,7 +750,7 @@ struct task_struct {
 	void *journal_info;
 
 /* VM state */
-	struct reclaim_state *reclaim_state;
+	unsigned long *reclaimed_slab;
 
 	struct dentry *proc_dentry;
 	struct backing_dev_info *backing_dev_info;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3-mm2/include/linux/swap.h linux-2.6.12-rc3-mm2+remove-reclaim_state/include/linux/swap.h
--- linux-2.6.12-rc3-mm2/include/linux/swap.h	2005-05-02 12:16:13.891654320 -0700
+++ linux-2.6.12-rc3-mm2+remove-reclaim_state/include/linux/swap.h	2005-05-02 12:17:24.117978296 -0700
@@ -65,14 +65,6 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
-/*
- * current->reclaim_state points to one of these when a task is running
- * memory reclaim
- */
-struct reclaim_state {
-	unsigned long reclaimed_slab;
-};
-
 #ifdef __KERNEL__
 
 struct address_space;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3-mm2/mm/page_alloc.c linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/page_alloc.c
--- linux-2.6.12-rc3-mm2/mm/page_alloc.c	2005-05-02 12:16:14.696531960 -0700
+++ linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/page_alloc.c	2005-05-02 12:17:24.141974648 -0700
@@ -768,7 +768,6 @@ __alloc_pages(unsigned int __nocast gfp_
 	const int wait = gfp_mask & __GFP_WAIT;
 	struct zone **zones, *z;
 	struct page *page;
-	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
 	int classzone_idx;
@@ -860,12 +859,7 @@ rebalance:
 
 	/* We now go into synchronous reclaim */
 	p->flags |= PF_MEMALLOC;
-	reclaim_state.reclaimed_slab = 0;
-	p->reclaim_state = &reclaim_state;
-
 	did_some_progress = try_to_free_pages(zones, gfp_mask, order);
-
-	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
 	cond_resched();
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3-mm2/mm/slab.c linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/slab.c
--- linux-2.6.12-rc3-mm2/mm/slab.c	2005-05-02 12:16:14.766521320 -0700
+++ linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/slab.c	2005-05-02 12:17:24.165971000 -0700
@@ -942,8 +942,8 @@ static void kmem_freepages(kmem_cache_t 
 		page++;
 	}
 	sub_page_state(nr_slab, nr_freed);
-	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += nr_freed;
+	if (current->reclaimed_slab)
+		*current->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 
 		atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.12-rc3-mm2/mm/vmscan.c linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/vmscan.c
--- linux-2.6.12-rc3-mm2/mm/vmscan.c	2005-05-02 12:16:14.796516760 -0700
+++ linux-2.6.12-rc3-mm2+remove-reclaim_state/mm/vmscan.c	2005-05-02 12:26:14.313376344 -0700
@@ -190,23 +190,28 @@ EXPORT_SYMBOL(remove_shrinker);
  *
  * Returns the number of slab objects which we shrunk.
  */
-static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
+static int shrink_slab(struct scan_control *sc, unsigned int gfp_mask,
 			unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	int ret = 0;
 
-	if (scanned == 0)
-		scanned = SWAP_CLUSTER_MAX;
+	if (sc->nr_scanned == 0)
+		sc->nr_scanned = SWAP_CLUSTER_MAX;
 
 	if (!down_read_trylock(&shrinker_rwsem))
 		return 1;	/* Assume we'll be able to shrink next time */
 
+	/*
+	 * Each shrinker will now increment sc->nr_reclaimed by the number
+	 * of pages it frees.
+	 */
+	current->reclaimed_slab = &sc->nr_reclaimed;
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 		unsigned long total_scan;
 
-		delta = (4 * scanned) / shrinker->seeks;
+		delta = (4 * sc->nr_scanned) / shrinker->seeks;
 		delta *= (*shrinker->shrinker)(0, gfp_mask);
 		do_div(delta, lru_pages + 1);
 		shrinker->nr += delta;
@@ -235,6 +240,7 @@ static int shrink_slab(unsigned long sca
 
 		shrinker->nr += total_scan;
 	}
+	current->reclaimed_slab = NULL;
 	up_read(&shrinker_rwsem);
 	return ret;
 }
@@ -969,7 +975,6 @@ int try_to_free_pages(struct zone **zone
 	int priority;
 	int ret = 0;
 	int total_scanned = 0, total_reclaimed = 0;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 	unsigned long lru_pages = 0;
 	int i;
@@ -998,11 +1003,7 @@ int try_to_free_pages(struct zone **zone
 		sc.priority = priority;
 		sc.swap_cluster_max = SWAP_CLUSTER_MAX;
 		shrink_caches(zones, &sc);
-		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
-		if (reclaim_state) {
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
-			reclaim_state->reclaimed_slab = 0;
-		}
+		shrink_slab(&sc, gfp_mask, lru_pages);
 		total_scanned += sc.nr_scanned;
 		total_reclaimed += sc.nr_reclaimed;
 		if (total_reclaimed >= sc.swap_cluster_max) {
@@ -1070,7 +1071,6 @@ static int balance_pgdat(pg_data_t *pgda
 	int priority;
 	int i;
 	int total_scanned, total_reclaimed;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
 
 loop_again:
@@ -1161,10 +1161,7 @@ scan:
 			sc.priority = priority;
 			sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
 			shrink_zone(zone, &sc);
-			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+			nr_slab = shrink_slab(&sc, GFP_KERNEL, lru_pages);
 			total_reclaimed += sc.nr_reclaimed;
 			total_scanned += sc.nr_scanned;
 			if (zone->unreclaimable == ALL_UNRECLAIMABLE)
@@ -1234,16 +1231,12 @@ static int kswapd(void *p)
 	pg_data_t *pgdat = (pg_data_t*)p;
 	struct task_struct *tsk = current;
 	DEFINE_WAIT(wait);
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 	cpumask_t cpumask;
 
 	daemonize("kswapd%d", pgdat->node_id);
 	cpumask = node_to_cpumask(pgdat->node_id);
 	if (!cpus_empty(cpumask))
 		set_cpus_allowed(tsk, cpumask);
-	current->reclaim_state = &reclaim_state;
 
 	/*
 	 * Tell the memory management that we're a "memory allocator",
@@ -1317,11 +1310,7 @@ int shrink_all_memory(int nr_pages)
 	pg_data_t *pgdat;
 	int nr_to_free = nr_pages;
 	int ret = 0;
-	struct reclaim_state reclaim_state = {
-		.reclaimed_slab = 0,
-	};
 
-	current->reclaim_state = &reclaim_state;
 	for_each_pgdat(pgdat) {
 		int freed;
 		freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1330,7 +1319,6 @@ int shrink_all_memory(int nr_pages)
 		if (nr_to_free <= 0)
 			break;
 	}
-	current->reclaim_state = NULL;
 	return ret;
 }
 #endif

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-05-02 19:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-04-29  1:15 [PATCH] Remove struct reclaim_state Matthew Dobson
2005-04-30  0:51 ` Andrew Morton
2005-05-02 18:44   ` Matthew Dobson
2005-05-02 19:39     ` Matthew Dobson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox