* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute [not found] <1613543513-11965-1-git-send-email-faiyazm@codeaurora.org> @ 2021-02-17 11:38 ` Faiyaz Mohammed [not found] ` <13df1c88-3848-1969-5424-33a927ec033e@suse.cz> 1 sibling, 0 replies; 4+ messages in thread From: Faiyaz Mohammed @ 2021-02-17 11:38 UTC (permalink / raw) To: cl, penberg, rientjes, iamjoonsoo.kim, akpm, vbabka, willy, linux-kernel, linux-mm Cc: vinmenon +linux-mm, linux-kernel. On 2/17/2021 12:01 PM, Faiyaz Mohammed wrote: > Reading the sys slab alloc_calls, free_calls returns the available object > owners, but the size of this file is limited to PAGE_SIZE > because of the limitation of sysfs attributes, it is returning the > partial owner info, which is not sufficient to debug/account the slab > memory and alloc_calls output is not matching with /proc/slabinfo. > > To remove the PAGE_SIZE limitation converted the sys slab > alloc_calls, free_calls to bin attribute. > > Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org> > --- > mm/slub.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 63 insertions(+), 21 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index b22a4b1..71cfe3b 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -37,6 +37,9 @@ > > #include <trace/events/kmem.h> > > +#define TRACE_ENTRY_MAX 80 > +#define TRACKS_PER_PAGE ((PAGE_SIZE - KSYM_SYMBOL_LEN - 100) / TRACE_ENTRY_MAX) > + > #include "internal.h" > > /* > @@ -4748,6 +4751,7 @@ static int list_locations(struct kmem_cache *s, char *buf, > struct loc_track t = { 0, 0, NULL }; > int node; > struct kmem_cache_node *n; > + unsigned int previous_read_count = 0; > > if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), > GFP_KERNEL)) { > @@ -4756,6 +4760,11 @@ static int list_locations(struct kmem_cache *s, char *buf, > /* Push back cpu slabs */ > flush_all(s); > > + if (offset != 0) > + previous_read_count = (offset / TRACE_ENTRY_MAX); > + > + memset(buf, 0, PAGE_SIZE); > + > for_each_kmem_cache_node(s, node, n) { > unsigned long flags; > struct page *page; > @@ -4771,48 +4780,62 @@ static int list_locations(struct kmem_cache *s, char *buf, > spin_unlock_irqrestore(&n->list_lock, flags); > } > > - for (i = 0; i < t.count; i++) { > + for (i = previous_read_count; i < t.count; i++) { > struct location *l = &t.loc[i]; > + unsigned int cur_len = 0; > > - len += sysfs_emit_at(buf, len, "%7ld ", l->count); > + cur_len += sysfs_emit_at(buf, cur_len + len, "%7ld ", l->count); > > if (l->addr) > - len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr); > + cur_len += sysfs_emit_at(buf, cur_len + len, "%pS", (void *)l->addr); > else > - len += sysfs_emit_at(buf, len, "<not-available>"); > + cur_len += sysfs_emit_at(buf, cur_len + len, "<not-available>"); > > if (l->sum_time != l->min_time) > - len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld", > + cur_len += sysfs_emit_at(buf, cur_len + len, " age=%ld/%ld/%ld", > l->min_time, > (long)div_u64(l->sum_time, > l->count), > l->max_time); > else > - len += sysfs_emit_at(buf, len, " age=%ld", l->min_time); > + cur_len += sysfs_emit_at(buf, cur_len + len, " age=%ld", l->min_time); > > if (l->min_pid != l->max_pid) > - len += sysfs_emit_at(buf, len, " pid=%ld-%ld", > + cur_len += sysfs_emit_at(buf, cur_len + len, " pid=%ld-%ld", > l->min_pid, l->max_pid); > else > - len += sysfs_emit_at(buf, len, " pid=%ld", > + cur_len += sysfs_emit_at(buf, cur_len + len, " pid=%ld", > l->min_pid); > > if (num_online_cpus() > 1 && > !cpumask_empty(to_cpumask(l->cpus))) > - len += sysfs_emit_at(buf, len, " cpus=%*pbl", > + cur_len += sysfs_emit_at(buf, cur_len + len, " cpus=%*pbl", > cpumask_pr_args(to_cpumask(l->cpus))); > > if (nr_online_nodes > 1 && !nodes_empty(l->nodes)) > - len += sysfs_emit_at(buf, len, " nodes=%*pbl", > + cur_len += sysfs_emit_at(buf, cur_len + len, " nodes=%*pbl", > nodemask_pr_args(&l->nodes)); > > + if (cur_len >= TRACE_ENTRY_MAX) > + cur_len -= (cur_len % TRACE_ENTRY_MAX) - 1; > + else if (cur_len < TRACE_ENTRY_MAX) > + cur_len += TRACE_ENTRY_MAX - (cur_len % TRACE_ENTRY_MAX) - 1; > + > + len += cur_len; > + > len += sysfs_emit_at(buf, len, "\n"); > + > + if (i >= (previous_read_count + TRACKS_PER_PAGE)) > + break; > + > } > > - free_loc_track(&t); > - if (!t.count) > - len += sysfs_emit_at(buf, len, "No data\n"); > + if (((previous_read_count > t.count) | (i >= t.count)) && (offset != 0)) > + len = 0; > + else if (!t.count) > + len += sprintf(buf, "No data\n"); > > + free_loc_track(&t); > return len; > } > #endif /* CONFIG_SLUB_DEBUG */ > @@ -5280,21 +5303,33 @@ static ssize_t validate_store(struct kmem_cache *s, > } > SLAB_ATTR(validate); > > -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) > +static ssize_t alloc_calls_read(struct file *filp, struct kobject *kobj, > + struct bin_attribute *bin_attr, char *buf, > + loff_t offset, size_t count) > { > + struct kmem_cache *s; > + > + s = to_slab(kobj); > if (!(s->flags & SLAB_STORE_USER)) > return -ENOSYS; > - return list_locations(s, buf, TRACK_ALLOC); > + > + return list_locations(s, buf, offset, TRACK_ALLOC); > } > -SLAB_ATTR_RO(alloc_calls); > +BIN_ATTR_RO(alloc_calls, 0); > > -static ssize_t free_calls_show(struct kmem_cache *s, char *buf) > +static ssize_t free_calls_read(struct file *filp, struct kobject *kobj, > + struct bin_attribute *bin_attr, char *buf, > + loff_t offset, size_t count) > { > + struct kmem_cache *s; > + > + s = to_slab(kobj); > if (!(s->flags & SLAB_STORE_USER)) > return -ENOSYS; > - return list_locations(s, buf, TRACK_FREE); > + > + return list_locations(s, buf, offset, TRACK_FREE); > } > -SLAB_ATTR_RO(free_calls); > +BIN_ATTR_RO(free_calls, 0); > #endif /* CONFIG_SLUB_DEBUG */ > > #ifdef CONFIG_FAILSLAB > @@ -5430,6 +5465,14 @@ STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); > STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); > #endif /* CONFIG_SLUB_STATS */ > > + > +static struct bin_attribute *slab_bin_attrs[] = { > +#ifdef CONFIG_SLUB_DEBUG > + &bin_attr_alloc_calls, > + &bin_attr_free_calls, > +#endif > +}; > + > static struct attribute *slab_attrs[] = { > &slab_size_attr.attr, > &object_size_attr.attr, > @@ -5458,8 +5501,6 @@ static struct attribute *slab_attrs[] = { > &poison_attr.attr, > &store_user_attr.attr, > &validate_attr.attr, > - &alloc_calls_attr.attr, > - &free_calls_attr.attr, > #endif > #ifdef CONFIG_ZONE_DMA > &cache_dma_attr.attr, > @@ -5505,6 +5546,7 @@ static struct attribute *slab_attrs[] = { > > static const struct attribute_group slab_attr_group = { > .attrs = slab_attrs, > + .bin_attrs = slab_bin_attrs, > }; > > static ssize_t slab_attr_show(struct kobject *kobj, > ^ permalink raw reply [flat|nested] 4+ messages in thread
[parent not found: <13df1c88-3848-1969-5424-33a927ec033e@suse.cz>]
[parent not found: <YD45e70b48gyXkIg@kroah.com>]
* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute [not found] ` <YD45e70b48gyXkIg@kroah.com> @ 2021-03-19 5:28 ` Faiyaz Mohammed 2021-03-19 6:54 ` Greg Kroah-Hartman 2021-03-19 9:40 ` Vlastimil Babka 0 siblings, 2 replies; 4+ messages in thread From: Faiyaz Mohammed @ 2021-03-19 5:28 UTC (permalink / raw) To: Greg Kroah-Hartman, Vlastimil Babka Cc: cl, penberg, rientjes, iamjoonsoo.kim, akpm, willy, vinmenon, Peter Zijlstra, linux-mm Hi, Sorry for late response! On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote: > On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote: >> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote: >>> Reading the sys slab alloc_calls, free_calls returns the available object >>> owners, but the size of this file is limited to PAGE_SIZE >>> because of the limitation of sysfs attributes, it is returning the >>> partial owner info, which is not sufficient to debug/account the slab >>> memory and alloc_calls output is not matching with /proc/slabinfo. >>> >>> To remove the PAGE_SIZE limitation converted the sys slab >>> alloc_calls, free_calls to bin attribut >>> >>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org> >> >> After discussing this with Greg and PeterZ, sysfs should be one value per file, >> and is just not proper API for this kind of info. We should reimplement at least >> these clearly debugging "attributes" in debugfs, where they belong, instead of >> trying to hack around the limitation in sysfs. > > sysfs is _REQUIRED_ to be "one value per file", any kernel code that > abuses this needs to be fixed up. > > Why are these slab files in sysfs at all anyway? They all feel like > debugging stuff to me, why not move everything to debugfs? Would make > for a lot less code overall. > Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and /sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is it okay to move only alloc_calls and free_calls? or we have to move whole sysfs interface to debugfs?. If we are moving only alloc_calls/free_calls interface to debugfs then I think we can add all slab objects data into single file. For example: /sys/kernel/debugfs/slab/alloc_calls, which will print all slab objects data. Example Output: alloc_list: call_site=__request_region+0xb4/0x2f0 count=228 object_size=128 slab_size=640 slab_name=kmalloc-128 or We can have just like current sysfs interface, have separate alloc_calls/free_calls traces per kmem cache. Which one would be better?. > Thanks and regards, Mohammed Faiyaz ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute 2021-03-19 5:28 ` Faiyaz Mohammed @ 2021-03-19 6:54 ` Greg Kroah-Hartman 2021-03-19 9:40 ` Vlastimil Babka 1 sibling, 0 replies; 4+ messages in thread From: Greg Kroah-Hartman @ 2021-03-19 6:54 UTC (permalink / raw) To: Faiyaz Mohammed Cc: Vlastimil Babka, cl, penberg, rientjes, iamjoonsoo.kim, akpm, willy, vinmenon, Peter Zijlstra, linux-mm On Fri, Mar 19, 2021 at 10:58:55AM +0530, Faiyaz Mohammed wrote: > Hi, > > Sorry for late response! > > On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote: > > On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote: > >> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote: > >>> Reading the sys slab alloc_calls, free_calls returns the available object > >>> owners, but the size of this file is limited to PAGE_SIZE > >>> because of the limitation of sysfs attributes, it is returning the > >>> partial owner info, which is not sufficient to debug/account the slab > >>> memory and alloc_calls output is not matching with /proc/slabinfo. > >>> > >>> To remove the PAGE_SIZE limitation converted the sys slab > >>> alloc_calls, free_calls to bin attribut > >>> > >>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org> > >> > >> After discussing this with Greg and PeterZ, sysfs should be one value per file, > >> and is just not proper API for this kind of info. We should reimplement at least > >> these clearly debugging "attributes" in debugfs, where they belong, instead of > >> trying to hack around the limitation in sysfs. > > > > sysfs is _REQUIRED_ to be "one value per file", any kernel code that > > abuses this needs to be fixed up. > > > > Why are these slab files in sysfs at all anyway? They all feel like > > debugging stuff to me, why not move everything to debugfs? Would make > > for a lot less code overall. > > > Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and > /sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is > it okay to move only alloc_calls and free_calls? or we have to move > whole sysfs interface to debugfs?. sysfs files should only have "one value" in them. Anything that violates that rule, should be moved to debugfs. thanks, greg k-h ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute 2021-03-19 5:28 ` Faiyaz Mohammed 2021-03-19 6:54 ` Greg Kroah-Hartman @ 2021-03-19 9:40 ` Vlastimil Babka 1 sibling, 0 replies; 4+ messages in thread From: Vlastimil Babka @ 2021-03-19 9:40 UTC (permalink / raw) To: Faiyaz Mohammed, Greg Kroah-Hartman Cc: cl, penberg, rientjes, iamjoonsoo.kim, akpm, willy, vinmenon, Peter Zijlstra, linux-mm On 3/19/21 6:28 AM, Faiyaz Mohammed wrote: > Hi, > > Sorry for late response! > > On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote: >> On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote: >>> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote: >>>> Reading the sys slab alloc_calls, free_calls returns the available object >>>> owners, but the size of this file is limited to PAGE_SIZE >>>> because of the limitation of sysfs attributes, it is returning the >>>> partial owner info, which is not sufficient to debug/account the slab >>>> memory and alloc_calls output is not matching with /proc/slabinfo. >>>> >>>> To remove the PAGE_SIZE limitation converted the sys slab >>>> alloc_calls, free_calls to bin attribut >>>> >>>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org> >>> >>> After discussing this with Greg and PeterZ, sysfs should be one value per file, >>> and is just not proper API for this kind of info. We should reimplement at least >>> these clearly debugging "attributes" in debugfs, where they belong, instead of >>> trying to hack around the limitation in sysfs. >> >> sysfs is _REQUIRED_ to be "one value per file", any kernel code that >> abuses this needs to be fixed up. >> >> Why are these slab files in sysfs at all anyway? They all feel like >> debugging stuff to me, why not move everything to debugfs? Would make >> for a lot less code overall. >> > Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and > /sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is > it okay to move only alloc_calls and free_calls? or we have to move > whole sysfs interface to debugfs?. I don't think we need to move everything, just files where it makes sense. > If we are moving only alloc_calls/free_calls interface to debugfs then I > think we can add all slab objects data into single file. > > For example: /sys/kernel/debugfs/slab/alloc_calls, which will print all > slab objects data. > > Example Output: alloc_list: call_site=__request_region+0xb4/0x2f0 > count=228 object_size=128 slab_size=640 slab_name=kmalloc-128 I wouldn't do this, as processing all caches will have a large overhead and then somebody interested in single cache would throw most of the info away. > or > > We can have just like current sysfs interface, have separate > alloc_calls/free_calls traces per kmem cache. > > Which one would be better?. > >> > > Thanks and regards, > Mohammed Faiyaz > ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-03-19 9:40 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <1613543513-11965-1-git-send-email-faiyazm@codeaurora.org>
2021-02-17 11:38 ` [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute Faiyaz Mohammed
[not found] ` <13df1c88-3848-1969-5424-33a927ec033e@suse.cz>
[not found] ` <YD45e70b48gyXkIg@kroah.com>
2021-03-19 5:28 ` Faiyaz Mohammed
2021-03-19 6:54 ` Greg Kroah-Hartman
2021-03-19 9:40 ` Vlastimil Babka
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox