* pte_chain_mempool-2.5.27-1
@ 2002-07-21 3:55 William Lee Irwin III
2002-07-22 6:07 ` pte_chain_mempool-2.5.27-1 Andrew Morton
0 siblings, 1 reply; 3+ messages in thread
From: William Lee Irwin III @ 2002-07-21 3:55 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-mm, riel, anton
This patch, in order to achieve more reliable and efficient allocation,
converts the pte_chain freelist to use mempool, which in turn uses the
slab allocator as a front-end. This includes a cleanup of the obsoleted
statistics, which are now slab-maintained, and a replacement of them
with the ReverseMaps: statistics which reports the number of reverse
mappings performed.
$ diffstat ~/patches/pte_chain_mempool-1
fs/proc/proc_misc.c | 6 --
include/linux/page-flags.h | 3 -
init/main.c | 4 -
mm/page_alloc.c | 3 -
mm/rmap.c | 99 ++++++++++++++++-----------------------------
5 files changed, 43 insertions(+), 72 deletions(-)
Cheers,
Bill
===== fs/proc/proc_misc.c 1.31 vs edited =====
--- 1.31/fs/proc/proc_misc.c Tue Jul 16 14:46:30 2002
+++ edited/fs/proc/proc_misc.c Sat Jul 20 18:42:07 2002
@@ -161,8 +161,7 @@
"Dirty: %8lu kB\n"
"Writeback: %8lu kB\n"
"PageTables: %8lu kB\n"
- "PteChainTot: %8lu kB\n"
- "PteChainUsed: %8lu kB\n",
+ "ReverseMaps: %8lu\n",
K(i.totalram),
K(i.freeram),
K(i.sharedram),
@@ -179,8 +178,7 @@
K(ps.nr_dirty),
K(ps.nr_writeback),
K(ps.nr_page_table_pages),
- K(ps.nr_pte_chain_pages),
- ps.used_pte_chains_bytes >> 10
+ K(ps.nr_reverse_maps)
);
return proc_calc_metrics(page, start, off, count, eof, len);
===== include/linux/page-flags.h 1.12 vs edited =====
--- 1.12/include/linux/page-flags.h Tue Jul 16 14:46:30 2002
+++ edited/include/linux/page-flags.h Sat Jul 20 18:39:12 2002
@@ -79,8 +79,7 @@
unsigned long nr_active; /* on active_list LRU */
unsigned long nr_inactive; /* on inactive_list LRU */
unsigned long nr_page_table_pages;
- unsigned long nr_pte_chain_pages;
- unsigned long used_pte_chains_bytes;
+ unsigned long nr_reverse_maps;
} ____cacheline_aligned_in_smp page_states[NR_CPUS];
extern void get_page_state(struct page_state *ret);
===== init/main.c 1.51 vs edited =====
--- 1.51/init/main.c Fri Jul 19 16:00:55 2002
+++ edited/init/main.c Sat Jul 20 16:03:02 2002
@@ -70,7 +70,7 @@
extern void sysctl_init(void);
extern void signals_init(void);
extern void buffer_init(void);
-
+extern void pte_chain_init(void);
extern void radix_tree_init(void);
extern void free_initmem(void);
@@ -386,7 +386,7 @@
mem_init();
kmem_cache_sizes_init();
pgtable_cache_init();
-
+ pte_chain_init();
mempages = num_physpages;
fork_init(mempages);
===== mm/page_alloc.c 1.82 vs edited =====
--- 1.82/mm/page_alloc.c Tue Jul 16 14:46:36 2002
+++ edited/mm/page_alloc.c Sat Jul 20 18:39:37 2002
@@ -566,8 +566,7 @@
ret->nr_active += ps->nr_active;
ret->nr_inactive += ps->nr_inactive;
ret->nr_page_table_pages += ps->nr_page_table_pages;
- ret->nr_pte_chain_pages += ps->nr_pte_chain_pages;
- ret->used_pte_chains_bytes += ps->used_pte_chains_bytes;
+ ret->nr_reverse_maps += ps->nr_reverse_maps;
}
}
===== mm/rmap.c 1.3 vs edited =====
--- 1.3/mm/rmap.c Tue Jul 16 14:46:30 2002
+++ edited/mm/rmap.c Sat Jul 20 18:41:35 2002
@@ -23,6 +23,9 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/swapops.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/init.h>
#include <asm/pgalloc.h>
#include <asm/rmap.h>
@@ -50,10 +53,12 @@
pte_t * ptep;
};
+
+static kmem_cache_t *pte_chain_cache;
+static mempool_t *pte_chain_pool;
static inline struct pte_chain * pte_chain_alloc(void);
static inline void pte_chain_free(struct pte_chain *, struct pte_chain *,
struct page *);
-static void alloc_new_pte_chains(void);
/**
* page_referenced - test if the page was referenced
@@ -148,6 +153,7 @@
}
pte_chain_unlock(page);
+ inc_page_state(nr_reverse_maps);
}
/**
@@ -208,9 +214,9 @@
#endif
out:
+ dec_page_state(nr_reverse_maps);
pte_chain_unlock(page);
return;
-
}
/**
@@ -355,27 +361,6 @@
** functions.
**/
-struct pte_chain * pte_chain_freelist;
-spinlock_t pte_chain_freelist_lock = SPIN_LOCK_UNLOCKED;
-
-/* Maybe we should have standard ops for singly linked lists ... - Rik */
-static inline void pte_chain_push(struct pte_chain * pte_chain)
-{
- pte_chain->ptep = NULL;
- pte_chain->next = pte_chain_freelist;
- pte_chain_freelist = pte_chain;
-}
-
-static inline struct pte_chain * pte_chain_pop(void)
-{
- struct pte_chain *pte_chain;
-
- pte_chain = pte_chain_freelist;
- pte_chain_freelist = pte_chain->next;
- pte_chain->next = NULL;
-
- return pte_chain;
-}
/**
* pte_chain_free - free pte_chain structure
@@ -391,15 +376,12 @@
static inline void pte_chain_free(struct pte_chain * pte_chain,
struct pte_chain * prev_pte_chain, struct page * page)
{
- mod_page_state(used_pte_chains_bytes, -sizeof(struct pte_chain));
if (prev_pte_chain)
prev_pte_chain->next = pte_chain->next;
else if (page)
page->pte.chain = pte_chain->next;
- spin_lock(&pte_chain_freelist_lock);
- pte_chain_push(pte_chain);
- spin_unlock(&pte_chain_freelist_lock);
+ mempool_free(pte_chain, pte_chain_pool);
}
/**
@@ -411,45 +393,38 @@
*/
static inline struct pte_chain * pte_chain_alloc()
{
- struct pte_chain * pte_chain;
-
- spin_lock(&pte_chain_freelist_lock);
-
- /* Allocate new pte_chain structs as needed. */
- if (!pte_chain_freelist)
- alloc_new_pte_chains();
-
- /* Grab the first pte_chain from the freelist. */
- pte_chain = pte_chain_pop();
+ return (struct pte_chain *)mempool_alloc(pte_chain_pool, GFP_ATOMIC);
+}
- spin_unlock(&pte_chain_freelist_lock);
+static void *pte_chain_pool_alloc(int gfp_mask, void *ignored)
+{
+ (void)gfp_mask;
+ (void)ignored;
+ return kmem_cache_alloc(pte_chain_cache, GFP_ATOMIC);
+}
- mod_page_state(used_pte_chains_bytes, sizeof(struct pte_chain));
- return pte_chain;
+static void pte_chain_pool_free(void *pte_chain, void *ignored)
+{
+ kmem_cache_free(pte_chain_cache, pte_chain);
}
-/**
- * alloc_new_pte_chains - convert a free page to pte_chain structures
- *
- * Grabs a free page and converts it to pte_chain structures. We really
- * should pre-allocate these earlier in the pagefault path or come up
- * with some other trick.
- *
- * Note that we cannot use the slab cache because the pte_chain structure
- * is way smaller than the minimum size of a slab cache allocation.
- * Caller needs to hold the pte_chain_freelist_lock
- */
-static void alloc_new_pte_chains()
+void __init pte_chain_init(void)
{
- struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
- int i = PAGE_SIZE / sizeof(struct pte_chain);
+ pte_chain_cache = kmem_cache_create( "pte_chain",
+ sizeof(struct pte_chain),
+ 0,
+ 0,
+ NULL,
+ NULL);
- if (pte_chain) {
- inc_page_state(nr_pte_chain_pages);
- for (; i-- > 0; pte_chain++)
- pte_chain_push(pte_chain);
- } else {
- /* Yeah yeah, I'll fix the pte_chain allocation ... */
- panic("Fix pte_chain allocation, you lazy bastard!\n");
- }
+ if (!pte_chain_cache)
+ panic("failed to create pte_chain cache!\n");
+
+ pte_chain_pool = mempool_create(16*1024,
+ pte_chain_pool_alloc,
+ pte_chain_pool_free,
+ NULL);
+
+ if (!pte_chain_pool)
+ panic("Failed to create pte_chain mempool!\n");
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: pte_chain_mempool-2.5.27-1
2002-07-21 3:55 pte_chain_mempool-2.5.27-1 William Lee Irwin III
@ 2002-07-22 6:07 ` Andrew Morton
2002-07-22 6:17 ` pte_chain_mempool-2.5.27-1 William Lee Irwin III
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2002-07-22 6:07 UTC (permalink / raw)
To: William Lee Irwin III; +Cc: linux-kernel, linux-mm, riel, anton
William Lee Irwin III wrote:
>
> This patch, in order to achieve more reliable and efficient allocation,
> converts the pte_chain freelist to use mempool, which in turn uses the
> slab allocator as a front-end.
Using slab seems like a good idea to me. It gives us the per-cpu
freelists and GC for free.
mempool? Guess so.
mempool is really designed for things like IO request structures.
BIOs, etc. Things which are guaranteed to have short lifecycles.
Things which make the "wait for some objects to be freed" loop
in mempool_alloc() reliable.
However when mempool went in, a bunch of developers (including
myself) went "oh goody" and reused mempool to add some buffering
to things like radix tree nodes, buffer_heads, pte_chains, etc.
This is inappropriate, because those objects have a very different
lifecycle.
For example, back when swap was using buffer_heads, I was getting
tasks locked up in mempool_alloc(GFP_NOIO), waiting for buffer_heads
to come free. But no buffer_heads were being freed because there was
no memory pressure any more - somebody had just done a truncate() or
an exit(), there was plenty of free memory, nobody was calling
try_to_free_buffers() and the mempool_alloc caller was in indefinite
sleep. Waiting for someone to free up a buffer_head.
We could fix this problem by changing the schedule() in mempool_alloc()
into a schedule_timeout(not much), but Ingo didn't seem to like that.
Perhaps because we're using mempool in ways for which it was not
designed.
> + pte_chain_pool = mempool_create(16*1024,
> + pte_chain_pool_alloc,
> + pte_chain_pool_free,
> + NULL);
> +
Be aware that mempool kmallocs a contiguous chunk of element
pointers. This statement is asking for a
kmalloc(16384 * sizeof(void *)), which is 128k. It will work,
but only just.
How did you engineer the size of this pool, btw? In the
radix_tree code, we made the pool enormous. It was effectively
halved in size when the ratnodes went to 64 slots, but I still
have the fun task of working out what the pool size should really
be. In retrospect it would have been smarter to make it really
small and then increase it later in response to tester feedback.
Suggest you do that here.
-
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: pte_chain_mempool-2.5.27-1
2002-07-22 6:07 ` pte_chain_mempool-2.5.27-1 Andrew Morton
@ 2002-07-22 6:17 ` William Lee Irwin III
0 siblings, 0 replies; 3+ messages in thread
From: William Lee Irwin III @ 2002-07-22 6:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, linux-mm, riel, anton
On Sun, Jul 21, 2002 at 11:07:45PM -0700, Andrew Morton wrote:
> mempool? Guess so.
> mempool is really designed for things like IO request structures.
> BIOs, etc. Things which are guaranteed to have short lifecycles.
> Things which make the "wait for some objects to be freed" loop
> in mempool_alloc() reliable.
My usage of it was incorrect. Slab allocation alone will have to do.
On Sun, Jul 21, 2002 at 11:07:45PM -0700, Andrew Morton wrote:
> Be aware that mempool kmallocs a contiguous chunk of element
> pointers. This statement is asking for a
> kmalloc(16384 * sizeof(void *)), which is 128k. It will work,
> but only just.
> How did you engineer the size of this pool, btw? In the
> radix_tree code, we made the pool enormous. It was effectively
> halved in size when the ratnodes went to 64 slots, but I still
> have the fun task of working out what the pool size should really
> be. In retrospect it would have been smarter to make it really
> small and then increase it later in response to tester feedback.
> Suggest you do that here.
Any contiguous allocation that large is a bug. There was no engineering.
It was a "conservative guess", and hence even worse than the radix tree
node pool sizing early on. Removing mempool from it entirely is the best
option. pte_chains aren't transient enough to work with this, and my
misreading of mempool led me to believe it had the logic to deal with
the cases you described above.
OOM handling is on the way soon anyway, so mempool for "extra
reliability" will be a non-issue then.
Cheers,
Bill
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2002-07-22 6:17 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-07-21 3:55 pte_chain_mempool-2.5.27-1 William Lee Irwin III
2002-07-22 6:07 ` pte_chain_mempool-2.5.27-1 Andrew Morton
2002-07-22 6:17 ` pte_chain_mempool-2.5.27-1 William Lee Irwin III
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox