From: Wen Yang <wenyang@linux.alibaba.com>
To: Christoph Lameter <cl@linux.com>,
Pekka Enberg <penberg@kernel.org>,
David Rientjes <rientjes@google.com>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: Xunlei Pang <xlpang@linux.alibaba.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm/slub: Detach node lock from counting free objects
Date: Sat, 8 Feb 2020 11:03:15 +0800 [thread overview]
Message-ID: <5373ce28-c369-4e40-11dd-b269e4d2cb24@linux.alibaba.com> (raw)
In-Reply-To: <20200201031502.92218-1-wenyang@linux.alibaba.com>
[-- Attachment #1: Type: text/plain, Size: 7001 bytes --]
Hi,
I would greatly appreciate it if you kindly give me some feedback on this patch.
--
Best wishes,
Wen
On 2020/2/1 11:15 上午, Wen Yang wrote:
> The lock, protecting the node partial list, is taken when couting the free
> objects resident in that list. It introduces locking contention when the
> page(s) is moved between CPU and node partial lists in allocation path
> on another CPU. So reading "/proc/slabinfo" can possibily block the slab
> allocation on another CPU for a while, 200ms in extreme cases. If the
> slab object is to carry network packet, targeting the far-end disk array,
> it causes block IO jitter issue.
>
> This fixes the block IO jitter issue by caching the total inuse objects in
> the node in advance. The value is retrieved without taking the node partial
> list lock on reading "/proc/slabinfo".
>
> Signed-off-by: Wen Yang <wenyang@linux.alibaba.com>
> Cc: Christoph Lameter <cl@linux.com>
> Cc: Pekka Enberg <penberg@kernel.org>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Xunlei Pang <xlpang@linux.alibaba.com>
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> ---
> mm/slab.h | 1 +
> mm/slub.c | 42 +++++++++++++++++++++++++-----------------
> 2 files changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/mm/slab.h b/mm/slab.h
> index 7e94700aa78c..27d22837f7ff 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -619,6 +619,7 @@ struct kmem_cache_node {
> #ifdef CONFIG_SLUB_DEBUG
> atomic_long_t nr_slabs;
> atomic_long_t total_objects;
> + atomic_long_t total_inuse;
> struct list_head full;
> #endif
> #endif
> diff --git a/mm/slub.c b/mm/slub.c
> index 503e11b1c4e1..67640e797550 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1060,7 +1060,8 @@ static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
> return atomic_long_read(&n->nr_slabs);
> }
>
> -static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
> +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects,
> + int inuse)
> {
> struct kmem_cache_node *n = get_node(s, node);
>
> @@ -1073,14 +1074,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
> if (likely(n)) {
> atomic_long_inc(&n->nr_slabs);
> atomic_long_add(objects, &n->total_objects);
> + atomic_long_add(inuse, &n->total_inuse);
> }
> }
> -static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
> +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects,
> + int inuse)
> {
> struct kmem_cache_node *n = get_node(s, node);
>
> atomic_long_dec(&n->nr_slabs);
> atomic_long_sub(objects, &n->total_objects);
> + atomic_long_sub(inuse, &n->total_inuse);
> }
>
> /* Object debug checks for alloc/free paths */
> @@ -1395,9 +1399,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
> static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
> { return 0; }
> static inline void inc_slabs_node(struct kmem_cache *s, int node,
> - int objects) {}
> + int objects,
> + int inuse) {}
> static inline void dec_slabs_node(struct kmem_cache *s, int node,
> - int objects) {}
> + int objects,
> + int inuse) {}
>
> #endif /* CONFIG_SLUB_DEBUG */
>
> @@ -1708,7 +1714,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> if (!page)
> return NULL;
>
> - inc_slabs_node(s, page_to_nid(page), page->objects);
> + inc_slabs_node(s, page_to_nid(page), page->objects, page->inuse);
>
> return page;
> }
> @@ -1768,7 +1774,9 @@ static void free_slab(struct kmem_cache *s, struct page *page)
>
> static void discard_slab(struct kmem_cache *s, struct page *page)
> {
> - dec_slabs_node(s, page_to_nid(page), page->objects);
> + int inuse = page->objects;
> +
> + dec_slabs_node(s, page_to_nid(page), page->objects, inuse);
> free_slab(s, page);
> }
>
> @@ -2396,9 +2404,9 @@ static inline int node_match(struct page *page, int node)
> }
>
> #ifdef CONFIG_SLUB_DEBUG
> -static int count_free(struct page *page)
> +static inline unsigned long node_nr_inuse(struct kmem_cache_node *n)
> {
> - return page->objects - page->inuse;
> + return atomic_long_read(&n->total_inuse);
> }
>
> static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
> @@ -2448,14 +2456,14 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
> for_each_kmem_cache_node(s, node, n) {
> unsigned long nr_slabs;
> unsigned long nr_objs;
> - unsigned long nr_free;
> + unsigned long nr_inuse;
>
> - nr_free = count_partial(n, count_free);
> nr_slabs = node_nr_slabs(n);
> nr_objs = node_nr_objs(n);
> + nr_inuse = node_nr_inuse(n);
>
> pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
> - node, nr_slabs, nr_objs, nr_free);
> + node, nr_slabs, nr_objs, nr_objs - nr_inuse);
> }
> #endif
> }
> @@ -3348,6 +3356,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
> #ifdef CONFIG_SLUB_DEBUG
> atomic_long_set(&n->nr_slabs, 0);
> atomic_long_set(&n->total_objects, 0);
> + atomic_long_set(&n->total_inuse, 0);
> INIT_LIST_HEAD(&n->full);
> #endif
> }
> @@ -3411,7 +3420,7 @@ static void early_kmem_cache_node_alloc(int node)
> page->frozen = 0;
> kmem_cache_node->node[node] = n;
> init_kmem_cache_node(n);
> - inc_slabs_node(kmem_cache_node, node, page->objects);
> + inc_slabs_node(kmem_cache_node, node, page->objects, page->inuse);
>
> /*
> * No locks need to be taken here as it has just been
> @@ -4857,8 +4866,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
> if (flags & SO_TOTAL)
> x = atomic_long_read(&n->total_objects);
> else if (flags & SO_OBJECTS)
> - x = atomic_long_read(&n->total_objects) -
> - count_partial(n, count_free);
> + x = atomic_long_read(&n->total_inuse);
> else
> x = atomic_long_read(&n->nr_slabs);
> total += x;
> @@ -5900,17 +5908,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
> {
> unsigned long nr_slabs = 0;
> unsigned long nr_objs = 0;
> - unsigned long nr_free = 0;
> + unsigned long nr_inuse = 0;
> int node;
> struct kmem_cache_node *n;
>
> for_each_kmem_cache_node(s, node, n) {
> nr_slabs += node_nr_slabs(n);
> nr_objs += node_nr_objs(n);
> - nr_free += count_partial(n, count_free);
> + nr_inuse += node_nr_inuse(n);
> }
>
> - sinfo->active_objs = nr_objs - nr_free;
> + sinfo->active_objs = nr_inuse;
> sinfo->num_objs = nr_objs;
> sinfo->active_slabs = nr_slabs;
> sinfo->num_slabs = nr_slabs;
[-- Attachment #2: Type: text/html, Size: 8283 bytes --]
next prev parent reply other threads:[~2020-02-08 3:03 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-01 3:15 Wen Yang
2020-02-08 3:03 ` Wen Yang [this message]
2020-02-08 21:41 ` Christopher Lameter
2020-02-12 22:56 ` Andrew Morton
2020-02-14 2:16 ` Christopher Lameter
2020-02-12 22:52 ` Andrew Morton
2020-02-16 4:15 ` Wen Yang
2020-02-18 20:53 ` Roman Gushchin
2020-02-20 13:53 ` Wen Yang
2020-02-20 15:40 ` Roman Gushchin
2020-02-22 6:55 ` Wen Yang
2020-02-24 17:01 ` Roman Gushchin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5373ce28-c369-4e40-11dd-b269e4d2cb24@linux.alibaba.com \
--to=wenyang@linux.alibaba.com \
--cc=akpm@linux-foundation.org \
--cc=cl@linux.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=xlpang@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox