From: Pasha Tatashin <pasha.tatashin@soleen.com>
To: akpm@linux-foundation.org, jpoimboe@kernel.org,
pasha.tatashin@soleen.com, kent.overstreet@linux.dev,
peterz@infradead.org, nphamcs@gmail.com,
cerasuolodomenico@gmail.com, surenb@google.com,
lizhijian@fujitsu.com, willy@infradead.org,
shakeel.butt@linux.dev, vbabka@suse.cz, ziy@nvidia.com,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH] vmstat: Keep count of the maximum page reached by the kernel stack
Date: Tue, 12 Mar 2024 23:39:07 -0400 [thread overview]
Message-ID: <CA+CK2bCA6aoM0UGX+-vu5aG7iR-ngKB3oV8CxuNDc-pRvUr92w@mail.gmail.com> (raw)
In-Reply-To: <20240313033417.447216-1-pasha.tatashin@soleen.com>
On Tue, Mar 12, 2024 at 11:34 PM Pasha Tatashin
<pasha.tatashin@soleen.com> wrote:
>
> CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
> of memory that was left in stack. Every time the new anti-record is
> reached a message is printed to the console.
>
> However, this is not useful to know how much each page within stack was
> actually used. Provide a mechanism to count the number of time each
> stack page was reached throughout the live of the stack:
>
> $ grep kstack /proc/vmstat
> kstack_page_1 19974
> kstack_page_2 94
> kstack_page_3 0
> kstack_page_4 0
>
> In the above example only out of ~20K threads that ever exited on that
> machine only 94 touched second page of the stack, and none touched
> pages three and four.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
> include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++--
> include/linux/vm_event_item.h | 29 ++++++++++++++++++++++++
> include/linux/vmstat.h | 16 -------------
> mm/vmstat.c | 11 +++++++++
> 4 files changed, 77 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
> index ccd72b978e1f..7ff7f9997266 100644
> --- a/include/linux/sched/task_stack.h
> +++ b/include/linux/sched/task_stack.h
> @@ -95,9 +95,41 @@ static inline int object_is_on_stack(const void *obj)
> extern void thread_stack_cache_init(void);
>
> #ifdef CONFIG_DEBUG_STACK_USAGE
> +#ifdef CONFIG_VM_EVENT_COUNTERS
> +#include <linux/vm_event_item.h>
> +
> +/* Count the maximum pages reached in kernel stacks */
> +static inline void count_kstack_page(int stack_max_page)
> +{
> + switch (stack_max_page) {
> + case 1:
> + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]);
> + break;
> + case 2:
> + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]);
> + break;
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> + case 3:
> + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]);
> + break;
> + case 4:
> + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]);
> + break;
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> + default:
> + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]);
It should:
this_cpu_inc(vm_event_states.event[KSTACK_PAGE_REST]);
Will fix it in the next version.
> +#endif
> + }
> +}
> +#else /* !CONFIG_VM_EVENT_COUNTERS */
> +static inline void count_kstack_page(int stack_max_page) {}
> +#endif /* CONFIG_VM_EVENT_COUNTERS */
> +
> static inline unsigned long stack_not_used(struct task_struct *p)
> {
> unsigned long *n = end_of_stack(p);
> + unsigned long unused_stack;
>
> do { /* Skip over canary */
> # ifdef CONFIG_STACK_GROWSUP
> @@ -108,10 +140,13 @@ static inline unsigned long stack_not_used(struct task_struct *p)
> } while (!*n);
>
> # ifdef CONFIG_STACK_GROWSUP
> - return (unsigned long)end_of_stack(p) - (unsigned long)n;
> + unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n;
> # else
> - return (unsigned long)n - (unsigned long)end_of_stack(p);
> + unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p);
> # endif
> + count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1);
> +
> + return unused_stack;
> }
> #endif
> extern void set_task_stack_end_magic(struct task_struct *tsk);
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 747943bc8cc2..1dbfe47ff048 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -153,10 +153,39 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
> VMA_LOCK_ABORT,
> VMA_LOCK_RETRY,
> VMA_LOCK_MISS,
> +#endif
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> + KSTACK_PAGE_1,
> + KSTACK_PAGE_2,
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> + KSTACK_PAGE_3,
> + KSTACK_PAGE_4,
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> + KSTACK_PAGE_REST,
> +#endif
> #endif
> NR_VM_EVENT_ITEMS
> };
>
> +#ifdef CONFIG_VM_EVENT_COUNTERS
> +/*
> + * Light weight per cpu counter implementation.
> + *
> + * Counters should only be incremented and no critical kernel component
> + * should rely on the counter values.
> + *
> + * Counters are handled completely inline. On many platforms the code
> + * generated will simply be the increment of a global address.
> + */
> +
> +struct vm_event_state {
> + unsigned long event[NR_VM_EVENT_ITEMS];
> +};
> +
> +DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
> +#endif
> +
> #ifndef CONFIG_TRANSPARENT_HUGEPAGE
> #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
> #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 343906a98d6e..18d4a97d3afd 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -41,22 +41,6 @@ enum writeback_stat_item {
> };
>
> #ifdef CONFIG_VM_EVENT_COUNTERS
> -/*
> - * Light weight per cpu counter implementation.
> - *
> - * Counters should only be incremented and no critical kernel component
> - * should rely on the counter values.
> - *
> - * Counters are handled completely inline. On many platforms the code
> - * generated will simply be the increment of a global address.
> - */
> -
> -struct vm_event_state {
> - unsigned long event[NR_VM_EVENT_ITEMS];
> -};
> -
> -DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
> -
> /*
> * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
> * local_irq_disable overhead.
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index db79935e4a54..737c85689251 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1413,6 +1413,17 @@ const char * const vmstat_text[] = {
> "vma_lock_retry",
> "vma_lock_miss",
> #endif
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> + "kstack_page_1",
> + "kstack_page_2",
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> + "kstack_page_3",
> + "kstack_page_4",
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> + "kstack_page_rest",
> +#endif
> +#endif
> #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
> };
> #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
> --
> 2.44.0.278.ge034bb2e1d-goog
>
next prev parent reply other threads:[~2024-03-13 3:39 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-13 3:34 Pasha Tatashin
2024-03-13 3:39 ` Pasha Tatashin [this message]
2024-03-14 8:19 ` Christophe Leroy
2024-03-14 13:42 ` Pasha Tatashin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CA+CK2bCA6aoM0UGX+-vu5aG7iR-ngKB3oV8CxuNDc-pRvUr92w@mail.gmail.com \
--to=pasha.tatashin@soleen.com \
--cc=akpm@linux-foundation.org \
--cc=cerasuolodomenico@gmail.com \
--cc=jpoimboe@kernel.org \
--cc=kent.overstreet@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lizhijian@fujitsu.com \
--cc=nphamcs@gmail.com \
--cc=peterz@infradead.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox