* [PATCH] readahead: Add trace points
@ 2025-09-08 14:55 Jan Kara
2025-09-09 12:41 ` Pankaj Raghav (Samsung)
0 siblings, 1 reply; 3+ messages in thread
From: Jan Kara @ 2025-09-08 14:55 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-fsdevel, linux-mm, Jan Kara
Add a couple of trace points to make debugging readahead logic easier.
Signed-off-by: Jan Kara <jack@suse.cz>
---
include/trace/events/readahead.h | 132 +++++++++++++++++++++++++++++++
mm/readahead.c | 7 ++
2 files changed, 139 insertions(+)
create mode 100644 include/trace/events/readahead.h
I've added these when looking into how bs > page size interacts with readahead
code (and got bored with placing kprobes to dump info). I think they are useful
enough to warrant staying in the code...
diff --git a/include/trace/events/readahead.h b/include/trace/events/readahead.h
new file mode 100644
index 000000000000..992a6ce5c154
--- /dev/null
+++ b/include/trace/events/readahead.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM readahead
+
+#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_READAHEAD_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+TRACE_EVENT(do_page_cache_ra,
+ TP_PROTO(struct inode *inode, pgoff_t index, unsigned long nr_to_read,
+ unsigned long lookahead_size),
+
+ TP_ARGS(inode, index, nr_to_read, lookahead_size),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(pgoff_t, index)
+ __field(unsigned long, nr_to_read)
+ __field(unsigned long, lookahead_size)
+ ),
+
+ TP_fast_assign(
+ __entry->i_ino = inode->i_ino;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->index = index;
+ __entry->nr_to_read = nr_to_read;
+ __entry->lookahead_size = lookahead_size;
+ ),
+
+ TP_printk(
+ "dev=%d:%d ino=%lx index=%lu nr_to_read=%lu lookahead_size=%lu",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino,
+ __entry->index, __entry->nr_to_read, __entry->lookahead_size
+ )
+);
+
+TRACE_EVENT(page_cache_ra_order,
+ TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra),
+
+ TP_ARGS(inode, index, ra),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(pgoff_t, index)
+ __field(unsigned int, order)
+ __field(unsigned int, size)
+ __field(unsigned int, async_size)
+ __field(unsigned int, ra_pages)
+ ),
+
+ TP_fast_assign(
+ __entry->i_ino = inode->i_ino;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->index = index;
+ __entry->order = ra->order;
+ __entry->size = ra->size;
+ __entry->async_size = ra->async_size;
+ __entry->ra_pages = ra->ra_pages;
+ ),
+
+ TP_printk(
+ "dev=%d:%d ino=%lx index=%lu order=%u size=%u async_size=%u ra_pages=%u",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino,
+ __entry->index, __entry->order, __entry->size,
+ __entry->async_size, __entry->ra_pages
+ )
+);
+
+DECLARE_EVENT_CLASS(page_cache_ra_op,
+ TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra,
+ unsigned long req_count),
+
+ TP_ARGS(inode, index, ra, req_count),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(pgoff_t, index)
+ __field(unsigned int, order)
+ __field(unsigned int, size)
+ __field(unsigned int, async_size)
+ __field(unsigned int, ra_pages)
+ __field(unsigned int, mmap_miss)
+ __field(loff_t, prev_pos)
+ __field(unsigned long, req_count)
+ ),
+
+ TP_fast_assign(
+ __entry->i_ino = inode->i_ino;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->index = index;
+ __entry->order = ra->order;
+ __entry->size = ra->size;
+ __entry->async_size = ra->async_size;
+ __entry->ra_pages = ra->ra_pages;
+ __entry->mmap_miss = ra->mmap_miss;
+ __entry->prev_pos = ra->prev_pos;
+ __entry->req_count = req_count;
+ ),
+
+ TP_printk(
+ "dev=%d:%d ino=%lx index=%lu req_count=%lu order=%u size=%u async_size=%u ra_pages=%u mmap_miss=%u prev_pos=%lld",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino,
+ __entry->index, __entry->req_count, __entry->order,
+ __entry->size, __entry->async_size, __entry->ra_pages,
+ __entry->mmap_miss, __entry->prev_pos
+ )
+);
+
+DEFINE_EVENT(page_cache_ra_op, page_cache_sync_ra,
+ TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra,
+ unsigned long req_count),
+ TP_ARGS(inode, index, ra, req_count)
+);
+
+DEFINE_EVENT(page_cache_ra_op, page_cache_async_ra,
+ TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra,
+ unsigned long req_count),
+ TP_ARGS(inode, index, ra, req_count)
+);
+
+#endif /* _TRACE_FILEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/readahead.c b/mm/readahead.c
index 406756d34309..210395fe1044 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -129,6 +129,9 @@
#include <linux/fadvise.h>
#include <linux/sched/mm.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/readahead.h>
+
#include "internal.h"
/*
@@ -314,6 +317,7 @@ static void do_page_cache_ra(struct readahead_control *ractl,
loff_t isize = i_size_read(inode);
pgoff_t end_index; /* The last page we want to read */
+ trace_do_page_cache_ra(inode, index, nr_to_read, lookahead_size);
if (isize == 0)
return;
@@ -470,6 +474,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
gfp_t gfp = readahead_gfp_mask(mapping);
unsigned int new_order = ra->order;
+ trace_page_cache_ra_order(mapping->host, start, ra);
if (!mapping_large_folio_support(mapping)) {
ra->order = 0;
goto fallback;
@@ -554,6 +559,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
unsigned long max_pages, contig_count;
pgoff_t prev_index, miss;
+ trace_page_cache_sync_ra(ractl->mapping->host, index, ra, req_count);
/*
* Even if readahead is disabled, issue this request as readahead
* as we'll need it to satisfy the requested range. The forced
@@ -638,6 +644,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
if (folio_test_writeback(folio))
return;
+ trace_page_cache_async_ra(ractl->mapping->host, index, ra, req_count);
folio_clear_readahead(folio);
if (blk_cgroup_congested())
--
2.51.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] readahead: Add trace points
2025-09-08 14:55 [PATCH] readahead: Add trace points Jan Kara
@ 2025-09-09 12:41 ` Pankaj Raghav (Samsung)
2025-09-09 14:47 ` Jan Kara
0 siblings, 1 reply; 3+ messages in thread
From: Pankaj Raghav (Samsung) @ 2025-09-09 12:41 UTC (permalink / raw)
To: Jan Kara; +Cc: Andrew Morton, linux-fsdevel, linux-mm
On Mon, Sep 08, 2025 at 04:55:34PM +0200, Jan Kara wrote:
>
> /*
> @@ -314,6 +317,7 @@ static void do_page_cache_ra(struct readahead_control *ractl,
> loff_t isize = i_size_read(inode);
> pgoff_t end_index; /* The last page we want to read */
>
> + trace_do_page_cache_ra(inode, index, nr_to_read, lookahead_size);
Any reason why put a probe here instead of page_cache_ra_unbounded as
that is where the actual readahead happens?
--
Pankaj Raghav
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] readahead: Add trace points
2025-09-09 12:41 ` Pankaj Raghav (Samsung)
@ 2025-09-09 14:47 ` Jan Kara
0 siblings, 0 replies; 3+ messages in thread
From: Jan Kara @ 2025-09-09 14:47 UTC (permalink / raw)
To: Pankaj Raghav (Samsung); +Cc: Jan Kara, Andrew Morton, linux-fsdevel, linux-mm
On Tue 09-09-25 14:41:33, Pankaj Raghav (Samsung) wrote:
> On Mon, Sep 08, 2025 at 04:55:34PM +0200, Jan Kara wrote:
> >
> > /*
> > @@ -314,6 +317,7 @@ static void do_page_cache_ra(struct readahead_control *ractl,
> > loff_t isize = i_size_read(inode);
> > pgoff_t end_index; /* The last page we want to read */
> >
> > + trace_do_page_cache_ra(inode, index, nr_to_read, lookahead_size);
>
> Any reason why put a probe here instead of page_cache_ra_unbounded as
> that is where the actual readahead happens?
Hum, no. Originally I had it in force_page_cache_ra() but then I've decided
do_page_cache_ra() is better because it captures also other places issuing
non-standard readahead. But you're right that placing the tracepoint in
page_cache_ra_unbounded() will achieve that as well and will be a more
standard place. I'll respin the patch. Thanks for suggestion.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-09-09 14:48 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-08 14:55 [PATCH] readahead: Add trace points Jan Kara
2025-09-09 12:41 ` Pankaj Raghav (Samsung)
2025-09-09 14:47 ` Jan Kara
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox