From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
To: Vlastimil Babka <vbabka@suse.cz>,
Christoph Lameter <cl@linux.com>,
Pekka Enberg <penberg@kernel.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
David Rientjes <rientjes@google.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>,
Feng Tang <feng.tang@intel.com>,
"Sang, Oliver" <oliver.sang@intel.com>,
Jay Patel <jaypatel@linux.ibm.com>,
Binder Makin <merimus@google.com>,
aneesh.kumar@linux.ibm.com, tsahu@linux.ibm.com,
piyushs@linux.ibm.com, fengwei.yin@intel.com,
ying.huang@intel.com, lkp <lkp@intel.com>,
"oe-lkp@lists.linux.dev" <oe-lkp@lists.linux.dev>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Hyeonggon Yoo <42.hyeyoo@gmail.com>
Subject: [RFC 1/2] Revert "mm, slub: change percpu partial accounting from objects to pages"
Date: Mon, 24 Jul 2023 04:09:05 +0900 [thread overview]
Message-ID: <20230723190906.4082646-2-42.hyeyoo@gmail.com> (raw)
In-Reply-To: <20230723190906.4082646-1-42.hyeyoo@gmail.com>
This is partial revert of commit b47291ef02b0 ("mm, slub: change percpu
partial accounting from objects to pages"). and full revert of commit
662188c3a20e ("mm/slub: Simplify struct slab slabs field definition").
While b47291ef02b0 prevents percpu partial slab list becoming too long,
it assumes that the order of slabs are always oo_order(s->oo).
The current approach can surprisingly lower the number of objects cached
per cpu when it fails to allocate high order slabs. Instead of accounting
the number of slabs, change it back to accounting objects, but keep
the assumption that the slab is always half-full.
With this change, the number of cached objects per cpu is not surprisingly
decreased even when it fails to allocate high order slabs. It still
prevents large inaccuracy because it does not account based on the
number of free objects when taking slabs.
---
include/linux/slub_def.h | 2 --
mm/slab.h | 6 ++++++
mm/slub.c | 31 ++++++++++++-------------------
3 files changed, 18 insertions(+), 21 deletions(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index deb90cf4bffb..589ff6a2a23f 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -109,8 +109,6 @@ struct kmem_cache {
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
- /* Number of per cpu partial slabs to keep around */
- unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
diff --git a/mm/slab.h b/mm/slab.h
index 799a315695c6..be38a264df16 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -65,7 +65,13 @@ struct slab {
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
+#ifdef CONFIG_64BIT
int slabs; /* Nr of slabs left */
+ int pobjects; /* Approximate count */
+#else
+ short int slabs;
+ short int pobjects;
+#endif
};
#endif
};
diff --git a/mm/slub.c b/mm/slub.c
index f7940048138c..199d3d03d5b9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -486,18 +486,7 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
- unsigned int nr_slabs;
-
s->cpu_partial = nr_objects;
-
- /*
- * We take the number of objects but actually limit the number of
- * slabs on the per cpu partial list, in order to limit excessive
- * growth of the list. For simplicity we assume that the slabs will
- * be half-full.
- */
- nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
- s->cpu_partial_slabs = nr_slabs;
}
#else
static inline void
@@ -2275,7 +2264,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab, *slab2;
void *object = NULL;
unsigned long flags;
- unsigned int partial_slabs = 0;
+ int objects_taken = 0;
/*
* Racy check. If we mistakenly see no partial slabs then we
@@ -2312,11 +2301,11 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
} else {
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
- partial_slabs++;
+ objects_taken += slab->objects / 2;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
if (!kmem_cache_has_cpu_partial(s)
- || partial_slabs > s->cpu_partial_slabs / 2)
+ || objects_taken > s->cpu_partial / 2)
break;
#else
break;
@@ -2699,13 +2688,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
struct slab *slab_to_unfreeze = NULL;
unsigned long flags;
int slabs = 0;
+ int pobjects = 0;
local_lock_irqsave(&s->cpu_slab->lock, flags);
oldslab = this_cpu_read(s->cpu_slab->partial);
if (oldslab) {
- if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
+ if (drain && oldslab->pobjects >= s->cpu_partial) {
/*
* Partial array is full. Move the existing set to the
* per node partial list. Postpone the actual unfreezing
@@ -2714,14 +2704,17 @@ static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
slab_to_unfreeze = oldslab;
oldslab = NULL;
} else {
+ pobjects = oldslab->pobjects;
slabs = oldslab->slabs;
}
}
slabs++;
+ pobjects += slab->objects / 2;
slab->slabs = slabs;
slab->next = oldslab;
+ slab->pobjects = pobjects;
this_cpu_write(s->cpu_slab->partial, slab);
@@ -5653,13 +5646,13 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
- if (slab)
+ if (slab) {
slabs += slab->slabs;
+ objects += slab->objects;
+ }
}
#endif
- /* Approximate half-full slabs, see slub_set_cpu_partial() */
- objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
#ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -5669,7 +5662,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
slabs = READ_ONCE(slab->slabs);
- objects = (slabs * oo_objects(s->oo)) / 2;
+ objects = READ_ONCE(slab->pobjects);
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, slabs);
}
--
2.41.0
next prev parent reply other threads:[~2023-07-23 19:09 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-23 19:09 [RFC 0/2] An attempt to improve SLUB on NUMA / under memory pressure Hyeonggon Yoo
2023-07-23 19:09 ` Hyeonggon Yoo [this message]
2023-07-26 10:34 ` [RFC 1/2] Revert "mm, slub: change percpu partial accounting from objects to pages" Vlastimil Babka
2023-08-21 15:11 ` Hyeonggon Yoo
2023-07-23 19:09 ` [RFC 2/2] mm/slub: prefer NUMA locality over slight memory saving on NUMA machines Hyeonggon Yoo
2023-08-03 14:54 ` Vlastimil Babka
2023-08-07 8:39 ` Hyeonggon Yoo
2023-08-08 9:59 ` Vlastimil Babka
2023-08-10 10:55 ` [RFC 0/2] An attempt to improve SLUB on NUMA / under memory pressure Jay Patel
2023-08-10 18:06 ` Hyeonggon Yoo
2023-08-18 6:45 ` Jay Patel
2023-08-18 15:18 ` Hyeonggon Yoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230723190906.4082646-2-42.hyeyoo@gmail.com \
--to=42.hyeyoo@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@linux.ibm.com \
--cc=cl@linux.com \
--cc=feng.tang@intel.com \
--cc=fengwei.yin@intel.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=jaypatel@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lkp@intel.com \
--cc=merimus@google.com \
--cc=oe-lkp@lists.linux.dev \
--cc=oliver.sang@intel.com \
--cc=penberg@kernel.org \
--cc=piyushs@linux.ibm.com \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=tsahu@linux.ibm.com \
--cc=vbabka@suse.cz \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox