* [PATCH] slub: Remove use of page->counter
@ 2018-04-10 19:54 Matthew Wilcox
2018-04-10 20:47 ` Christopher Lameter
0 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2018-04-10 19:54 UTC (permalink / raw)
To: Christopher Lameter; +Cc: linux-mm
Hi Christoph,
In my continued attempt to clean up struct page, I've got to the point
where it'd be really nice to get rid of 'counters'. I like the patch
below because it makes it clear when & where we're doing "weird" things
to access the various counters.
On 32-bit x86, it cuts the size of .text down by 400 bytes because GCC
now decides to inline get_partial_node() into its caller. At least on
the 32-bit configuration I was testing with.
As a payoff, struct page now looks like this in my tree, which should make
it slightly easier for people to use the storage available in struct page:
struct {
unsigned long flags;
union {
struct {
struct address_space *mapping;
pgoff_t index;
};
struct {
void *s_mem;
void *freelist;
};
...
};
union {
atomic_t _mapcount;
unsigned int active;
...
};
atomic_t _refcount;
union {
struct {
struct list_head lru;
unsigned long private;
};
struct {
struct page *next;
int pages;
int pobjects;
struct kmem_cache *slab_cache;
};
...
};
struct mem_cgroup *mem_cgroup;
};
diff --git a/mm/slub.c b/mm/slub.c
index 0f55f0a0dcaa..a94075051ff3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -55,8 +55,9 @@
* have the ability to do a cmpxchg_double. It only protects the second
* double word in the page struct. Meaning
* A. page->freelist -> List of object free in a page
- * B. page->counters -> Counters of objects
- * C. page->frozen -> frozen state
+ * B. page->inuse -> Number of objects in use
+ * C. page->objects -> Number of objects
+ * D. page->frozen -> frozen state
*
* If a slab is frozen then it is exempt from list management. It is not
* on any list. The processor that froze the slab is the one who can
@@ -356,23 +357,28 @@ static __always_inline void slab_unlock(struct page *page)
__bit_spin_unlock(PG_locked, &page->flags);
}
-static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
+/*
+ * Load the various slab counters atomically. On 64-bit machines, this will
+ * also load the page's _refcount field so we can use cmpxchg_double() to
+ * atomically set freelist and the counters.
+ */
+static inline unsigned long get_counters(struct page *p)
{
- struct page tmp;
- tmp.counters = counters_new;
- /*
- * page->counters can cover frozen/inuse/objects as well
- * as page->_refcount. If we assign to ->counters directly
- * we run the risk of losing updates to page->_refcount, so
- * be careful and only assign to the fields we need.
- */
- page->frozen = tmp.frozen;
- page->inuse = tmp.inuse;
- page->objects = tmp.objects;
+ return *(unsigned long *)&p->active;
+}
+
+/*
+ * set_counters() should only be called on a struct page on the stack,
+ * not an active struct page, or we'll overwrite _refcount.
+ */
+static inline void set_counters(struct page *p, unsigned long x)
+{
+ *(unsigned long *)&p->active = x;
}
/* Interrupts must be disabled (for the fallback code to work right) */
-static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+static inline
+bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
@@ -381,7 +387,8 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist, &page->counters,
+ if (cmpxchg_double(&page->freelist,
+ (unsigned long *)&page->active,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
@@ -390,9 +397,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
{
slab_lock(page);
if (page->freelist == freelist_old &&
- page->counters == counters_old) {
+ page->active == (unsigned int)counters_old) {
page->freelist = freelist_new;
- set_page_slub_counters(page, counters_new);
+ page->active = counters_new;
slab_unlock(page);
return true;
}
@@ -417,7 +424,8 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist, &page->counters,
+ if (cmpxchg_double(&page->freelist,
+ (unsigned long *)&page->active,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
@@ -429,9 +437,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
local_irq_save(flags);
slab_lock(page);
if (page->freelist == freelist_old &&
- page->counters == counters_old) {
+ page->active == (unsigned int)counters_old) {
page->freelist = freelist_new;
- set_page_slub_counters(page, counters_new);
+ page->active = counters_new;
slab_unlock(page);
local_irq_restore(flags);
return true;
@@ -1771,8 +1779,8 @@ static inline void *acquire_slab(struct kmem_cache *s,
* per cpu allocation list.
*/
freelist = page->freelist;
- counters = page->counters;
- new.counters = counters;
+ counters = get_counters(page);
+ set_counters(&new, counters);
*objects = new.objects - new.inuse;
if (mode) {
new.inuse = page->objects;
@@ -1786,7 +1794,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
if (!__cmpxchg_double_slab(s, page,
freelist, counters,
- new.freelist, new.counters,
+ new.freelist, get_counters(&new),
"acquire_slab"))
return NULL;
@@ -2033,15 +2041,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
do {
prior = page->freelist;
- counters = page->counters;
+ counters = get_counters(page);
set_freepointer(s, freelist, prior);
- new.counters = counters;
+ set_counters(&new, counters);
new.inuse--;
VM_BUG_ON(!new.frozen);
} while (!__cmpxchg_double_slab(s, page,
prior, counters,
- freelist, new.counters,
+ freelist, get_counters(&new),
"drain percpu freelist"));
freelist = nextfree;
@@ -2064,11 +2072,11 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
redo:
old.freelist = page->freelist;
- old.counters = page->counters;
+ set_counters(&old, get_counters(page));
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
- new.counters = old.counters;
+ set_counters(&new, get_counters(&old));
if (freelist) {
new.inuse--;
set_freepointer(s, freelist, old.freelist);
@@ -2129,8 +2137,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
l = m;
if (!__cmpxchg_double_slab(s, page,
- old.freelist, old.counters,
- new.freelist, new.counters,
+ old.freelist, get_counters(&old),
+ new.freelist, get_counters(&new),
"unfreezing slab"))
goto redo;
@@ -2179,17 +2187,17 @@ static void unfreeze_partials(struct kmem_cache *s,
do {
old.freelist = page->freelist;
- old.counters = page->counters;
+ set_counters(&old, get_counters(page));
VM_BUG_ON(!old.frozen);
- new.counters = old.counters;
+ set_counters(&new, get_counters(&old));
new.freelist = old.freelist;
new.frozen = 0;
} while (!__cmpxchg_double_slab(s, page,
- old.freelist, old.counters,
- new.freelist, new.counters,
+ old.freelist, get_counters(&old),
+ new.freelist, get_counters(&new),
"unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
@@ -2476,9 +2484,9 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
do {
freelist = page->freelist;
- counters = page->counters;
+ counters = get_counters(page);
- new.counters = counters;
+ set_counters(&new, counters);
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
@@ -2486,7 +2494,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
} while (!__cmpxchg_double_slab(s, page,
freelist, counters,
- NULL, new.counters,
+ NULL, get_counters(&new),
"get_freelist"));
return freelist;
@@ -2813,9 +2821,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
n = NULL;
}
prior = page->freelist;
- counters = page->counters;
+ counters = get_counters(page);
set_freepointer(s, tail, prior);
- new.counters = counters;
+ set_counters(&new, counters);
was_frozen = new.frozen;
new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
@@ -2848,7 +2856,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
} while (!cmpxchg_double_slab(s, page,
prior, counters,
- head, new.counters,
+ head, get_counters(&new),
"__slab_free"));
if (likely(!n)) {
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-10 19:54 [PATCH] slub: Remove use of page->counter Matthew Wilcox
@ 2018-04-10 20:47 ` Christopher Lameter
2018-04-10 20:57 ` Matthew Wilcox
0 siblings, 1 reply; 7+ messages in thread
From: Christopher Lameter @ 2018-04-10 20:47 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-mm
On Tue, 10 Apr 2018, Matthew Wilcox wrote:
> In my continued attempt to clean up struct page, I've got to the point
> where it'd be really nice to get rid of 'counters'. I like the patch
> below because it makes it clear when & where we're doing "weird" things
> to access the various counters.
Well sounds good.
> struct {
> unsigned long flags;
> union {
> struct {
> struct address_space *mapping;
> pgoff_t index;
> };
> struct {
> void *s_mem;
> void *freelist;
> };
> ...
> };
> union {
> atomic_t _mapcount;
> unsigned int active;
Is this aligned on a doubleword boundary? Maybe move the refcount below
the flags field?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-10 20:47 ` Christopher Lameter
@ 2018-04-10 20:57 ` Matthew Wilcox
2018-04-10 22:03 ` Christopher Lameter
0 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2018-04-10 20:57 UTC (permalink / raw)
To: Christopher Lameter; +Cc: linux-mm
On Tue, Apr 10, 2018 at 03:47:28PM -0500, Christopher Lameter wrote:
> On Tue, 10 Apr 2018, Matthew Wilcox wrote:
>
> > In my continued attempt to clean up struct page, I've got to the point
> > where it'd be really nice to get rid of 'counters'. I like the patch
> > below because it makes it clear when & where we're doing "weird" things
> > to access the various counters.
>
> Well sounds good.
>
> > struct {
> > unsigned long flags;
> > union {
> > struct {
> > struct address_space *mapping;
> > pgoff_t index;
> > };
> > struct {
> > void *s_mem;
/* Dword boundary */
> > void *freelist;
> > };
> > ...
> > };
> > union {
> > atomic_t _mapcount;
> > unsigned int active;
>
> Is this aligned on a doubleword boundary? Maybe move the refcount below
> the flags field?
You need freelist and _mapcount to be in the same dword. There's no
space to put them both in dword 0, so that's used for flags and mapping
/ s_mem. Then freelist, mapcount and refcount are in dword 1 (on 64-bit),
or freelist & mapcount are in dword 1 on 32-bit. After that, 32 and 64-bit
no longer line up on the same dword boundaries.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-10 20:57 ` Matthew Wilcox
@ 2018-04-10 22:03 ` Christopher Lameter
2018-04-11 18:26 ` Matthew Wilcox
0 siblings, 1 reply; 7+ messages in thread
From: Christopher Lameter @ 2018-04-10 22:03 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-mm
On Tue, 10 Apr 2018, Matthew Wilcox wrote:
> > Is this aligned on a doubleword boundary? Maybe move the refcount below
> > the flags field?
>
> You need freelist and _mapcount to be in the same dword. There's no
> space to put them both in dword 0, so that's used for flags and mapping
> / s_mem. Then freelist, mapcount and refcount are in dword 1 (on 64-bit),
> or freelist & mapcount are in dword 1 on 32-bit. After that, 32 and 64-bit
> no longer line up on the same dword boundaries.
Well its no longer clear from the definitions that this must be the case.
Clarify that in the next version?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-10 22:03 ` Christopher Lameter
@ 2018-04-11 18:26 ` Matthew Wilcox
2018-04-16 13:53 ` Matthew Wilcox
0 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2018-04-11 18:26 UTC (permalink / raw)
To: Christopher Lameter; +Cc: linux-mm
On Tue, Apr 10, 2018 at 05:03:17PM -0500, Christopher Lameter wrote:
> On Tue, 10 Apr 2018, Matthew Wilcox wrote:
>
> > > Is this aligned on a doubleword boundary? Maybe move the refcount below
> > > the flags field?
> >
> > You need freelist and _mapcount to be in the same dword. There's no
> > space to put them both in dword 0, so that's used for flags and mapping
> > / s_mem. Then freelist, mapcount and refcount are in dword 1 (on 64-bit),
> > or freelist & mapcount are in dword 1 on 32-bit. After that, 32 and 64-bit
> > no longer line up on the same dword boundaries.
>
> Well its no longer clear from the definitions that this must be the case.
> Clarify that in the next version?
I had a Thought. And it seems to work:
struct page {
unsigned long flags;
union { /* Five words */
struct { /* Page cache & anonymous pages */
struct list_head lru;
unsigned long private;
struct address_space *mapping;
pgoff_t index;
};
struct { /* Slab / Slob / Slub */
struct page *next;
void *freelist;
union {
unsigned int active; /* slab */
struct { /* slub */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
int units; /* slob */
};
#ifdef CONFIG_64BIT
int pages;
#endif
void *s_mem;
struct kmem_cache *slab_cache;
};
struct rcu_head rcu_head;
... tail pages, page tables, etc, etc ...
};
union {
atomic_t _mapcount;
unsigned int page_type;
#ifdef CONFIG_64BIT
unsigned int pobjects; /* slab */
#else
struct {
short int pages;
short int pobjects;
};
#endif
};
atomic_t _refcount;
struct mem_cgroup *mem_cgroup;
};
Now everybody gets 5 contiguous words to use as they want with the only
caveat that they can't use bit 0 of the first word (PageTail). It looks
a little messy to split up pages & pobjects like that -- as far as I
can see there's no reason we couldn't make them unsigned short on 64BIT?
pages is always <= pobjects, and pobjects is limited to 2^15.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-11 18:26 ` Matthew Wilcox
@ 2018-04-16 13:53 ` Matthew Wilcox
2018-04-16 15:08 ` Christopher Lameter
0 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2018-04-16 13:53 UTC (permalink / raw)
To: Christopher Lameter; +Cc: linux-mm
On Wed, Apr 11, 2018 at 11:26:06AM -0700, Matthew Wilcox wrote:
> I had a Thought. And it seems to work:
Now I realised that slub doesn't use s_mem. That gives us more space
to use for pages/pobjects.
struct page {
unsigned long flags;
union { /* Five words */
struct { /* Page cache & anonymous pages */
struct list_head lru;
unsigned long private;
struct address_space *mapping;
pgoff_t index;
};
struct { /* Slob */
struct list_head slob_list;
int units;
};
struct { /* Slab */
struct kmem_cache *slab_cache;
void *freelist;
void *s_mem;
unsigned int active;
};
struct { /* Slub */
struct kmem_cache *slub_cache;
/* Dword boundary */
void *slub_freelist;
unsigned short inuse;
unsigned short objects:15;
unsigned short frozen:1;
struct page *next;
#ifdef CONFIG_64BIT
int pobjects;
int pages;
#endif
short int pages;
short int pobjects;
#endif
};
struct rcu_head rcu_head;
... tail pages, page tables, etc, etc ...
};
union {
atomic_t _mapcount;
unsigned int page_type;
};
atomic_t _refcount;
struct mem_cgroup *mem_cgroup;
};
> Now everybody gets 5 contiguous words to use as they want with the only
> caveat that they can't use bit 0 of the first word (PageTail).
^^^ still true ;-)
I'd want to change slob to use slob_list instead of ->lru. Or maybe even do
something radical like _naming_ the struct in the union so we don't have to
manually namespace the names of the elements.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] slub: Remove use of page->counter
2018-04-16 13:53 ` Matthew Wilcox
@ 2018-04-16 15:08 ` Christopher Lameter
0 siblings, 0 replies; 7+ messages in thread
From: Christopher Lameter @ 2018-04-16 15:08 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-mm
On Mon, 16 Apr 2018, Matthew Wilcox wrote:
> struct { /* Slub */
> struct kmem_cache *slub_cache;
> /* Dword boundary */
> void *slub_freelist;
> unsigned short inuse;
> unsigned short objects:15;
> unsigned short frozen:1;
> struct page *next;
> #ifdef CONFIG_64BIT
> int pobjects;
> int pages;
> #endif
> short int pages;
> short int pobjects;
> #endif
That looks better.
> I'd want to change slob to use slob_list instead of ->lru. Or maybe even do
> something radical like _naming_ the struct in the union so we don't have to
> manually namespace the names of the elements.
Hmmm... How would that look?
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2018-04-16 15:08 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-10 19:54 [PATCH] slub: Remove use of page->counter Matthew Wilcox
2018-04-10 20:47 ` Christopher Lameter
2018-04-10 20:57 ` Matthew Wilcox
2018-04-10 22:03 ` Christopher Lameter
2018-04-11 18:26 ` Matthew Wilcox
2018-04-16 13:53 ` Matthew Wilcox
2018-04-16 15:08 ` Christopher Lameter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox