[RFC] non-refcounted pages, application to slab?

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [RFC] non-refcounted pages, application to slab?
@ 2006-01-25  9:39 Nick Piggin
  2006-01-25  9:54 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Nick Piggin @ 2006-01-25  9:39 UTC (permalink / raw)
  To: Linux Kernel Mailing List, Linux Memory Management List

If an allocator knows exactly the lifetime of its page, then there is no
need to do refcounting or the final put_page_zestzero (atomic op + mem
barriers).

This is probably not worthwhile for most cases, but slab did strike me
as a potential candidate (however the complication here is that some
code I think uses the refcount of underlying pages of slab allocations
eg nommu code). So it is not a complete patch, but I wonder if anyone
thinks the savings might be worth the complexity?

Is there any particular code that is really heavy on slab allocations?
That isn't mostly handled by the slab's internal freelists?

Thanks,
Nick

--
Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -47,15 +47,16 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_NOREF	((__force gfp_t)0x40000u)/* Don't refcount page */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
-			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL| \
+			__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP|__GFP_ZERO| \
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_NOREF)
 
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
 #define GFP_ATOMIC	(__GFP_HIGH)
@@ -118,6 +119,12 @@ static inline struct page *alloc_pages_n
 		NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
 
+static inline struct page *alloc_pages_noref_node(int nid, gfp_t gfp_mask,
+						unsigned int order)
+{
+	return alloc_pages_node(nid, gfp_mask|__GFP_NOREF, order);
+}
+
 #ifdef CONFIG_NUMA
 extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 
@@ -148,7 +155,9 @@ extern unsigned long FASTCALL(get_zeroed
 		__get_free_pages((gfp_mask) | GFP_DMA,(order))
 
 extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+extern void FASTCALL(__free_pages_noref(struct page *page, unsigned int order));
 extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
+extern void FASTCALL(free_pages_noref(unsigned long addr, unsigned int order));
 extern void FASTCALL(free_hot_page(struct page *page));
 extern void FASTCALL(free_cold_page(struct page *page));
 
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c
+++ linux-2.6/mm/slab.c
@@ -1220,7 +1220,7 @@ static void *kmem_getpages(kmem_cache_t 
 	int i;
 
 	flags |= cachep->gfpflags;
-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	page = alloc_pages_noref_node(nodeid, flags, cachep->gfporder);
 	if (!page)
 		return NULL;
 	addr = page_address(page);
@@ -1253,7 +1253,7 @@ static void kmem_freepages(kmem_cache_t 
 	sub_page_state(nr_slab, nr_freed);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	free_pages((unsigned long)addr, cachep->gfporder);
+	free_pages_noref((unsigned long)addr, cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
 }
@@ -2604,10 +2604,10 @@ static inline void *__cache_alloc(kmem_c
 
 	local_irq_save(save_flags);
 	objp = ____cache_alloc(cachep, flags);
+	prefetchw(objp);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					    __builtin_return_address(0));
-	prefetchw(objp);
 	return objp;
 }
 
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -493,10 +493,19 @@ static inline void expand(struct zone *z
 	}
 }
 
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+{
+	int i;
+
+	VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+	for(i = 0; i < (1 << order); i++)
+		clear_highpage(page + i);
+}
+
 /*
  * This page is about to be returned from the page allocator
  */
-static int prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
@@ -525,7 +534,16 @@ static int prep_new_page(struct page *pa
 			1 << PG_referenced | 1 << PG_arch_1 |
 			1 << PG_checked | 1 << PG_mappedtodisk);
 	set_page_private(page, 0);
-	set_page_refs(page, order);
+
+	if (!(gfp_flags & __GFP_NOREF))
+		set_page_refs(page, order);
+
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
+
 	kernel_map_pages(page, 1 << order, 1);
 	return 0;
 }
@@ -733,15 +751,6 @@ void fastcall free_cold_page(struct page
 	free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
-{
-	int i;
-
-	VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
-	for(i = 0; i < (1 << order); i++)
-		clear_highpage(page + i);
-}
-
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -805,14 +814,8 @@ again:
 	put_cpu();
 
 	VM_BUG_ON(bad_range(zone, page));
-	if (prep_new_page(page, order))
+	if (prep_new_page(page, order, gfp_flags))
 		goto again;
-
-	if (gfp_flags & __GFP_ZERO)
-		prep_zero_page(page, order, gfp_flags);
-
-	if (order && (gfp_flags & __GFP_COMP))
-		prep_compound_page(page, order);
 	return page;
 
 failed:
@@ -1113,6 +1116,14 @@ fastcall void __free_pages(struct page *
 
 EXPORT_SYMBOL(__free_pages);
 
+fastcall void __free_pages_noref(struct page *page, unsigned int order)
+{
+	if (order == 0)
+		free_hot_page(page);
+	else
+		__free_pages_ok(page, order);
+}
+
 fastcall void free_pages(unsigned long addr, unsigned int order)
 {
 	if (addr != 0) {
@@ -1123,6 +1134,15 @@ fastcall void free_pages(unsigned long a
 
 EXPORT_SYMBOL(free_pages);
 
+fastcall void free_pages_noref(unsigned long addr, unsigned int order)
+{
+	if (addr != 0) {
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
+		__free_pages_noref(virt_to_page((void *)addr), order);
+	}
+}
+
+
 /*
  * Total amount of free (allocatable) RAM:
  */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25  9:39 [RFC] non-refcounted pages, application to slab? Nick Piggin
@ 2006-01-25  9:54 ` Eric Dumazet
  2006-01-25  9:56   ` Nick Piggin
  2006-01-25 10:26 ` Eric Dumazet
  2006-01-25 10:30 ` Pekka Enberg
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2006-01-25  9:54 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

Nick Piggin a ecrit :

> @@ -2604,10 +2604,10 @@ static inline void *__cache_alloc(kmem_c
>  
>  	local_irq_save(save_flags);
>  	objp = ____cache_alloc(cachep, flags);
> +	prefetchw(objp);
>  	local_irq_restore(save_flags);
>  	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
>  					    __builtin_return_address(0));
> -	prefetchw(objp);
>  	return objp;
>  }

I'm not sure why you moved this prefetchw(obj) : This is not related to your 
'non-refcounting' part, is it ?

When I added this prefetchw in slab code, I did place it *after* the 
local_irq_restore(save_flags); because I was not sure if the 
serialization/barrier (popf) would force the cpu (x86/x86_64 in mind) to either :
- finish all the loads (even if they are speculative/hints) (so giving a bad 
latency)
- cancel the speculative loads (so prefetchw() *before* the 
local_irq_restore() would be useless.

Thank you
Eric

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25  9:54 ` Eric Dumazet
@ 2006-01-25  9:56   ` Nick Piggin
  0 siblings, 0 replies; 10+ messages in thread
From: Nick Piggin @ 2006-01-25  9:56 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Nick Piggin, Linux Kernel Mailing List, Linux Memory Management List

On Wed, Jan 25, 2006 at 10:54:08AM +0100, Eric Dumazet wrote:
> Nick Piggin a ecrit :
> 
> >@@ -2604,10 +2604,10 @@ static inline void *__cache_alloc(kmem_c
> > 
> > 	local_irq_save(save_flags);
> > 	objp = ____cache_alloc(cachep, flags);
> >+	prefetchw(objp);
> > 	local_irq_restore(save_flags);
> > 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
> > 					    __builtin_return_address(0));
> >-	prefetchw(objp);
> > 	return objp;
> > }
> 
> I'm not sure why you moved this prefetchw(obj) : This is not related to 
> your 'non-refcounting' part, is it ?
> 

Stray hunk. Thanks.

Nick

> When I added this prefetchw in slab code, I did place it *after* the 
> local_irq_restore(save_flags); because I was not sure if the 
> serialization/barrier (popf) would force the cpu (x86/x86_64 in mind) to 
> either :
> - finish all the loads (even if they are speculative/hints) (so giving a 
> bad latency)
> - cancel the speculative loads (so prefetchw() *before* the 
> local_irq_restore() would be useless.
> 
Makes sense.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25  9:39 [RFC] non-refcounted pages, application to slab? Nick Piggin
  2006-01-25  9:54 ` Eric Dumazet
@ 2006-01-25 10:26 ` Eric Dumazet
  2006-01-25 10:57   ` Nick Piggin
  2006-01-25 10:30 ` Pekka Enberg
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2006-01-25 10:26 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

Nick Piggin a ecrit :
> If an allocator knows exactly the lifetime of its page, then there is no
> need to do refcounting or the final put_page_zestzero (atomic op + mem
> barriers).
> 
> This is probably not worthwhile for most cases, but slab did strike me
> as a potential candidate (however the complication here is that some
> code I think uses the refcount of underlying pages of slab allocations
> eg nommu code). So it is not a complete patch, but I wonder if anyone
> thinks the savings might be worth the complexity?
> 
> Is there any particular code that is really heavy on slab allocations?
> That isn't mostly handled by the slab's internal freelists?

Hi Nick

After reading your patch, I have some crazy idea.

The atomic op + mem barrier you want to avoid could be avoided more generally 
just by changing atomic_dec_and_test(atomic_t *v).

If the current thread is the last referer (refcnt = 1), then it can safely set 
the value to 0 because no other CPU can be touching the value (or else there 
must be a bug somewhere, as the 'other cpu' could touch the value just after 
us and we could free an object still in use by 'other cpu'

Something like :


--- include/asm-i386/atomic.h.orig      2006-01-25 12:11:46.000000000 +0100
+++ include/asm-i386/atomic.h   2006-01-25 12:13:07.000000000 +0100
@@ -130,6 +130,13 @@
                 printk("BUG: atomic counter underflow at:\n");
                 dump_stack();
         }
+#ifdef CONFIG_SMP
+       /* avoid an atomic op if we are the last user of this atomic */
+       if (atomic_read(v) == 1) {
+               atomic_set(v, 0); /* not a real atomic op on most machines */
+               return 1;
+       }
+#endif
         __asm__ __volatile__(
                 LOCK_PREFIX "decl %0; sete %1"
                 :"=m" (v->counter), "=qm" (c)


Thank you

Eric

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25  9:39 [RFC] non-refcounted pages, application to slab? Nick Piggin
  2006-01-25  9:54 ` Eric Dumazet
  2006-01-25 10:26 ` Eric Dumazet
@ 2006-01-25 10:30 ` Pekka Enberg
  2006-01-25 11:00   ` Nick Piggin
  2 siblings, 1 reply; 10+ messages in thread
From: Pekka Enberg @ 2006-01-25 10:30 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

Hi Nick,

On 1/25/06, Nick Piggin <npiggin@suse.de> wrote:
> This is probably not worthwhile for most cases, but slab did strike me
> as a potential candidate (however the complication here is that some
> code I think uses the refcount of underlying pages of slab allocations
> eg nommu code). So it is not a complete patch, but I wonder if anyone
> thinks the savings might be worth the complexity?
>
> Is there any particular code that is really heavy on slab allocations?
> That isn't mostly handled by the slab's internal freelists?

I certainly hope not. For heavy users, the slab allocator should grow
caches enough to satisfy most allocations from the them. Also, I think
we want to keep the reference counting for slab pages so that we can
use kmalloc'd memory in the block layer.

                                Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25 10:26 ` Eric Dumazet
@ 2006-01-25 10:57   ` Nick Piggin
  2006-01-25 11:10     ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Nick Piggin @ 2006-01-25 10:57 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Nick Piggin, Linux Kernel Mailing List, Linux Memory Management List

On Wed, Jan 25, 2006 at 11:26:01AM +0100, Eric Dumazet wrote:
> Nick Piggin a ecrit :
> >If an allocator knows exactly the lifetime of its page, then there is no
> >need to do refcounting or the final put_page_zestzero (atomic op + mem
> >barriers).
> >
> >This is probably not worthwhile for most cases, but slab did strike me
> >as a potential candidate (however the complication here is that some
> >code I think uses the refcount of underlying pages of slab allocations
> >eg nommu code). So it is not a complete patch, but I wonder if anyone
> >thinks the savings might be worth the complexity?
> >
> >Is there any particular code that is really heavy on slab allocations?
> >That isn't mostly handled by the slab's internal freelists?
> 
> Hi Nick
> 
> After reading your patch, I have some crazy idea.
> 
> The atomic op + mem barrier you want to avoid could be avoided more 
> generally just by changing atomic_dec_and_test(atomic_t *v).
> 
> If the current thread is the last referer (refcnt = 1), then it can safely 
> set the value to 0 because no other CPU can be touching the value (or else 
> there must be a bug somewhere, as the 'other cpu' could touch the value 
> just after us and we could free an object still in use by 'other cpu'
> 

I think that would work for this case, but you change the semantics
of the function for all users which is bad.

Such a test could be open coded in __free_page, although that does
add a branch + some icache, but that might also be an option. (and
my patch does also add to total icache footprint and is much uglier ;))

Thanks,
Nick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25 10:30 ` Pekka Enberg
@ 2006-01-25 11:00   ` Nick Piggin
  2006-01-25 11:19     ` Pekka Enberg
  0 siblings, 1 reply; 10+ messages in thread
From: Nick Piggin @ 2006-01-25 11:00 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Nick Piggin, Linux Kernel Mailing List, Linux Memory Management List

On Wed, Jan 25, 2006 at 12:30:03PM +0200, Pekka Enberg wrote:
> Hi Nick,
> 
> On 1/25/06, Nick Piggin <npiggin@suse.de> wrote:
> > This is probably not worthwhile for most cases, but slab did strike me
> > as a potential candidate (however the complication here is that some
> > code I think uses the refcount of underlying pages of slab allocations
> > eg nommu code). So it is not a complete patch, but I wonder if anyone
> > thinks the savings might be worth the complexity?
> >
> > Is there any particular code that is really heavy on slab allocations?
> > That isn't mostly handled by the slab's internal freelists?
> 
> I certainly hope not. For heavy users, the slab allocator should grow
> caches enough to satisfy most allocations from the them. Also, I think

I figured this would usually be the case.

> we want to keep the reference counting for slab pages so that we can
> use kmalloc'd memory in the block layer.
> 

Does that happen now? Where is it needed (nbd or something I guess?)

Nick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25 10:57   ` Nick Piggin
@ 2006-01-25 11:10     ` Eric Dumazet
  2006-01-25 11:18       ` Nick Piggin
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2006-01-25 11:10 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

Nick Piggin a ecrit :
> On Wed, Jan 25, 2006 at 11:26:01AM +0100, Eric Dumazet wrote:
>> Nick Piggin a ecrit :
>>> If an allocator knows exactly the lifetime of its page, then there is no
>>> need to do refcounting or the final put_page_zestzero (atomic op + mem
>>> barriers).
>>>
>>> This is probably not worthwhile for most cases, but slab did strike me
>>> as a potential candidate (however the complication here is that some
>>> code I think uses the refcount of underlying pages of slab allocations
>>> eg nommu code). So it is not a complete patch, but I wonder if anyone
>>> thinks the savings might be worth the complexity?
>>>
>>> Is there any particular code that is really heavy on slab allocations?
>>> That isn't mostly handled by the slab's internal freelists?
>> Hi Nick
>>
>> After reading your patch, I have some crazy idea.
>>
>> The atomic op + mem barrier you want to avoid could be avoided more 
>> generally just by changing atomic_dec_and_test(atomic_t *v).
>>
>> If the current thread is the last referer (refcnt = 1), then it can safely 
>> set the value to 0 because no other CPU can be touching the value (or else 
>> there must be a bug somewhere, as the 'other cpu' could touch the value 
>> just after us and we could free an object still in use by 'other cpu'
>>
> 
> I think that would work for this case, but you change the semantics
> of the function for all users which is bad.

Yes :) I did a test with a patched kernel and I got :

BUG: atomic counter underflow at:
  <c0103a3a> show_trace+0x20/0x22   <c0103b5b> dump_stack+0x1e/0x20
  <c01d6934> _atomic_dec_and_lock+0x78/0x88   <c0177599> dput+0xbf/0x187
  <c016dc96> path_release+0x14/0x30   <c016e540> __link_path_walk+0x36d/0xd5f
  <c016ef84> link_path_walk+0x52/0xd6   <c016f2ec> do_path_lookup+0xfc/0x220
  <c016f467> __path_lookup_intent_open+0x3e/0x73   <c016f4d1> 
path_lookup_open+0x35/0x37
  <c016fc79> open_namei+0x83/0x631   <c015f811> do_filp_open+0x38/0x56
  <c015fb83> do_sys_open+0x5c/0x99   <c015fbe7> sys_open+0x27/0x29
  <c0102bb3> sysenter_past_esp+0x54/0x75


So we cannot change atomic_dec_and_test(atomic_t *v) but introduce a new 
function like :

int atomic_dec_refcount(atomic_t *v)
{
#ifdef CONFIG_SMP
        /* avoid an atomic op if we are the last user of this refcount */
        if (atomic_read(v) == 1) {
                atomic_set(v, 0); /* not a real atomic op on most machines */
                return 1;
        }
#endif
	return atomic_dec_and_test(v);
}

The cost of the extra conditional branch is worth, if it can avoid an atomic op.


> 
> Such a test could be open coded in __free_page, although that does
> add a branch + some icache, but that might also be an option. (and
> my patch does also add to total icache footprint and is much uglier ;))
> 
> Thanks,
> Nick
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25 11:10     ` Eric Dumazet
@ 2006-01-25 11:18       ` Nick Piggin
  0 siblings, 0 replies; 10+ messages in thread
From: Nick Piggin @ 2006-01-25 11:18 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Nick Piggin, Linux Kernel Mailing List, Linux Memory Management List

On Wed, Jan 25, 2006 at 12:10:48PM +0100, Eric Dumazet wrote:
> 
> So we cannot change atomic_dec_and_test(atomic_t *v) but introduce a new 
> function like :
> 
> int atomic_dec_refcount(atomic_t *v)
> {
> #ifdef CONFIG_SMP
>        /* avoid an atomic op if we are the last user of this refcount */
>        if (atomic_read(v) == 1) {
>                atomic_set(v, 0); /* not a real atomic op on most machines */
>                return 1;
>        }
> #endif
> 	return atomic_dec_and_test(v);
> }
> 
> The cost of the extra conditional branch is worth, if it can avoid an 
> atomic op.
> 

If it can always avoid an atomic op then the conditional branch is
useless, and if it can avoid the atomic op in 20% of cases then it
might still be useless (especially considering the extra icache).

Actual measurements would be required I think.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] non-refcounted pages, application to slab?
  2006-01-25 11:00   ` Nick Piggin
@ 2006-01-25 11:19     ` Pekka Enberg
  0 siblings, 0 replies; 10+ messages in thread
From: Pekka Enberg @ 2006-01-25 11:19 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

On Wed, Jan 25, 2006 at 12:30:03PM +0200, Pekka Enberg wrote:
> > we want to keep the reference counting for slab pages so that we can
> > use kmalloc'd memory in the block layer.

On 1/25/06, Nick Piggin <npiggin@suse.de> wrote:
> Does that happen now? Where is it needed (nbd or something I guess?)

See the following thread:
http://thread.gmane.org/gmane.comp.file-systems.ext2.devel/2981. I
think we're using them in quite a few places.

                             Pekka

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-01-25 11:19 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-25  9:39 [RFC] non-refcounted pages, application to slab? Nick Piggin
2006-01-25  9:54 ` Eric Dumazet
2006-01-25  9:56   ` Nick Piggin
2006-01-25 10:26 ` Eric Dumazet
2006-01-25 10:57   ` Nick Piggin
2006-01-25 11:10     ` Eric Dumazet
2006-01-25 11:18       ` Nick Piggin
2006-01-25 10:30 ` Pekka Enberg
2006-01-25 11:00   ` Nick Piggin
2006-01-25 11:19     ` Pekka Enberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox