From: Oscar Salvador <osalvador@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Michal Hocko <mhocko@suse.com>, Vlastimil Babka <vbabka@suse.cz>,
Marco Elver <elver@google.com>,
Andrey Konovalov <andreyknvl@gmail.com>,
Alexander Potapenko <glider@google.com>,
Oscar Salvador <osalvador@suse.de>
Subject: [PATCH v8 2/5] mm,page_owner: Implement the tracking of the stacks count
Date: Mon, 12 Feb 2024 23:30:26 +0100 [thread overview]
Message-ID: <20240212223029.30769-3-osalvador@suse.de> (raw)
In-Reply-To: <20240212223029.30769-1-osalvador@suse.de>
page_owner needs to increment a stack_record refcount when a new allocation
occurs, and decrement it on a free operation.
In order to do that, we need to have a way to get a stack_record from a
handle.
Implement __stack_depot_get_stack_record() which just does that, and make
it public so page_owner can use it.
Also implement {inc,dec}_stack_record_count() which increments
or decrements on respective allocation and free operations, via
__reset_page_owner() (free operation) and __set_page_owner() (alloc
operation).
Traversing all stackdepot buckets comes with its own complexity,
plus we would have to implement a way to mark only those stack_records
that were originated from page_owner, as those are the ones we are
interested in.
For that reason, page_owner maintains its own list of stack_records,
because traversing that list is faster than traversing all buckets
while keeping at the same time a low complexity.
inc_stack_record_count() is responsible of adding new stack_records
into the list stack_list.
Modifications on the list are protected via a spinlock with irqs
disabled, since this code can also be reached from IRQ context.
Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
include/linux/stackdepot.h | 9 +++++
lib/stackdepot.c | 8 +++++
mm/page_owner.c | 73 ++++++++++++++++++++++++++++++++++++++
3 files changed, 90 insertions(+)
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 90274860fd8e..f3c2162bf615 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -175,6 +175,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries, gfp_t gfp_flags);
+/**
+ * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
+ * This function is only for internal purposes.
+ * @handle: Stack depot handle
+ *
+ * Return: Returns a pointer to a stack_record struct
+ */
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle);
+
/**
* stack_depot_fetch - Fetch a stack trace from stack depot
*
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 6f9095374847..fdb09450a538 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -685,6 +685,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
}
EXPORT_SYMBOL_GPL(stack_depot_save);
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
+{
+ if (!handle)
+ return NULL;
+
+ return depot_fetch_stack(handle);
+}
+
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 5634e5d890f8..7d1b3f75cef3 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -36,6 +36,14 @@ struct page_owner {
pid_t free_tgid;
};
+struct stack {
+ struct stack_record *stack_record;
+ struct stack *next;
+};
+
+static struct stack *stack_list;
+static DEFINE_SPINLOCK(stack_list_lock);
+
static bool page_owner_enabled __initdata;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
@@ -61,6 +69,57 @@ static __init bool need_page_owner(void)
return page_owner_enabled;
}
+static void add_stack_record_to_list(struct stack_record *stack_record)
+{
+ unsigned long flags;
+ struct stack *stack;
+
+ stack = kmalloc(sizeof(*stack), GFP_KERNEL);
+ if (stack) {
+ stack->stack_record = stack_record;
+ stack->next = NULL;
+
+ spin_lock_irqsave(&stack_list_lock, flags);
+ if (!stack_list) {
+ stack_list = stack;
+ } else {
+ stack->next = stack_list;
+ stack_list = stack;
+ }
+ spin_unlock_irqrestore(&stack_list_lock, flags);
+ }
+}
+
+static void inc_stack_record_count(depot_stack_handle_t handle)
+{
+ struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+ if (stack_record) {
+ /*
+ * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
+ * with REFCOUNT_SATURATED to catch spurious increments of their
+ * refcount.
+ * Since we do not use STACK_DEPOT_FLAG_{GET,PUT} API, let us
+ * set a refcount of 1 ourselves.
+ */
+ if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
+ refcount_set(&stack_record->count, 1);
+
+ /* Add the new stack_record to our list */
+ add_stack_record_to_list(stack_record);
+ }
+ refcount_inc(&stack_record->count);
+ }
+}
+
+static void dec_stack_record_count(depot_stack_handle_t handle)
+{
+ struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+ if (stack_record)
+ refcount_dec(&stack_record->count);
+}
+
static __always_inline depot_stack_handle_t create_dummy_stack(void)
{
unsigned long entries[4];
@@ -140,6 +199,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
int i;
struct page_ext *page_ext;
depot_stack_handle_t handle;
+ depot_stack_handle_t alloc_handle;
struct page_owner *page_owner;
u64 free_ts_nsec = local_clock();
@@ -147,6 +207,9 @@ void __reset_page_owner(struct page *page, unsigned short order)
if (unlikely(!page_ext))
return;
+ page_owner = get_page_owner(page_ext);
+ alloc_handle = page_owner->handle;
+
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
for (i = 0; i < (1 << order); i++) {
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
@@ -158,6 +221,15 @@ void __reset_page_owner(struct page *page, unsigned short order)
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
+ if (alloc_handle != early_handle)
+ /*
+ * early_handle is being set as a handle for all those
+ * early allocated pages. See init_pages_in_zone().
+ * Since their refcount is not being incremented because
+ * the machinery is not ready yet, we cannot decrement
+ * their refcount either.
+ */
+ dec_stack_record_count(alloc_handle);
}
static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -199,6 +271,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
return;
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
page_ext_put(page_ext);
+ inc_stack_record_count(handle);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
--
2.43.0
next prev parent reply other threads:[~2024-02-12 22:29 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-12 22:30 [PATCH v8 0/5] page_owner: print stacks and their outstanding allocations Oscar Salvador
2024-02-12 22:30 ` [PATCH v8 1/5] lib/stackdepot: Move stack_record struct definition into the header Oscar Salvador
2024-02-13 8:26 ` Marco Elver
2024-02-13 11:12 ` Vlastimil Babka
2024-02-12 22:30 ` Oscar Salvador [this message]
2024-02-13 8:30 ` [PATCH v8 2/5] mm,page_owner: Implement the tracking of the stacks count Marco Elver
2024-02-13 9:16 ` Oscar Salvador
2024-02-13 9:16 ` Vlastimil Babka
2024-02-13 9:21 ` Marco Elver
2024-02-13 11:34 ` Vlastimil Babka
2024-02-13 12:40 ` Oscar Salvador
2024-02-13 12:58 ` Marco Elver
2024-02-13 9:46 ` Oscar Salvador
2024-02-13 13:42 ` Vlastimil Babka
2024-02-13 15:29 ` Oscar Salvador
2024-02-13 16:04 ` Oscar Salvador
2024-02-12 22:30 ` [PATCH v8 3/5] mm,page_owner: Display all stacks and their count Oscar Salvador
2024-02-13 8:38 ` Marco Elver
2024-02-13 9:19 ` Oscar Salvador
2024-02-13 14:25 ` Vlastimil Babka
2024-02-13 15:33 ` Oscar Salvador
2024-02-13 15:36 ` Vlastimil Babka
2024-02-12 22:30 ` [PATCH v8 4/5] mm,page_owner: Filter out stacks by a threshold Oscar Salvador
2024-02-13 8:41 ` Marco Elver
2024-02-13 8:44 ` Marco Elver
2024-02-13 9:21 ` Oscar Salvador
2024-02-13 14:56 ` Vlastimil Babka
2024-02-12 22:30 ` [PATCH v8 5/5] mm,page_owner: Update Documentation regarding page_owner_stacks Oscar Salvador
2024-02-13 8:45 ` Marco Elver
2024-02-13 9:13 ` Oscar Salvador
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240212223029.30769-3-osalvador@suse.de \
--to=osalvador@suse.de \
--cc=akpm@linux-foundation.org \
--cc=andreyknvl@gmail.com \
--cc=elver@google.com \
--cc=glider@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox