From: "Yakunin, Dmitry (Nebius)" <zeil@nebius.com>
To: "cgroups@vger.kernel.org" <cgroups@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: NB-Core Team <NB-CoreTeam@nebius.com>,
"tj@kernel.org" <tj@kernel.org>,
"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
"mhocko@kernel.org" <mhocko@kernel.org>,
"Yakunin, Dmitry (Nebius)" <zeil@nebius.com>,
Konstantin Khlebnikov <khlebnikov@yandex-team.ru>,
Andrey Ryabinin <arbn@yandex-team.com>
Subject: [RFC PATCH 2/3] proc/kpagecgroup: report also inode numbers of offline cgroups
Date: Mon, 11 Sep 2023 07:55:20 +0000 [thread overview]
Message-ID: <20230911075437.74027-3-zeil@nebius.com> (raw)
In-Reply-To: <20230911075437.74027-1-zeil@nebius.com>
By default this interface reports inode number of closest online ancestor
if cgroups is offline (removed). Information about real owner is required
for detecting which pages keep removed cgroup.
This patch adds per-file mode which is changed by writing 64-bit flags
into opened /proc/kpagecgroup. For now only first bit is used.
Link: https://lore.kernel.org/lkml/153414348994.737150.10057219558779418929.stgit@buzz
Suggested-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Andrey Ryabinin <arbn@yandex-team.com>
Signed-off-by: Dmitry Yakunin <zeil@nebius.com>
---
fs/proc/page.c | 24 ++++++++++++++++++++++--
include/linux/memcontrol.h | 2 +-
mm/memcontrol.c | 5 +++--
mm/memory-failure.c | 2 +-
4 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 195b077c0fac..ae6feca2bbc7 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -278,6 +278,7 @@ static const struct proc_ops kpageflags_proc_ops = {
static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ unsigned long flags = (unsigned long)file->private_data;
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
struct page *ppage;
@@ -301,7 +302,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
ppage = pfn_to_online_page(pfn);
if (ppage)
- ino = page_cgroup_ino(ppage);
+ ino = page_cgroup_ino(ppage, !(flags & 1));
else
ino = 0;
@@ -323,10 +324,29 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t kpagecgroup_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 flags;
+
+ if (count != 8)
+ return -EINVAL;
+
+ if (get_user(flags, buf))
+ return -EFAULT;
+
+ if (flags > 1)
+ return -EINVAL;
+
+ file->private_data = (void *)(unsigned long)flags;
+ return count;
+}
+
static const struct proc_ops kpagecgroup_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecgroup_read,
+ .proc_write = kpagecgroup_write,
};
#endif /* CONFIG_MEMCG */
@@ -335,7 +355,7 @@ static int __init proc_page_init(void)
proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
#ifdef CONFIG_MEMCG
- proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
+ proc_create("kpagecgroup", 0600, NULL, &kpagecgroup_proc_ops);
#endif
return 0;
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 222d7370134c..bbbddaa260d3 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -892,7 +892,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
}
struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
-ino_t page_cgroup_ino(struct page *page);
+ino_t page_cgroup_ino(struct page *page, bool online);
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7b3d4a10ac63..48cfe3695e06 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -380,6 +380,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
/**
* page_cgroup_ino - return inode number of the memcg a page is charged to
* @page: the page
+ * @online: return closest online ancestor
*
* Look up the closest online ancestor of the memory cgroup @page is charged to
* and return its inode number or 0 if @page is not charged to any cgroup. It
@@ -390,7 +391,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
* after page_cgroup_ino() returns, so it only should be used by callers that
* do not care (such as procfs interfaces).
*/
-ino_t page_cgroup_ino(struct page *page)
+ino_t page_cgroup_ino(struct page *page, bool online)
{
struct mem_cgroup *memcg;
unsigned long ino = 0;
@@ -399,7 +400,7 @@ ino_t page_cgroup_ino(struct page *page)
/* page_folio() is racy here, but the entire function is racy anyway */
memcg = folio_memcg_check(page_folio(page));
- while (memcg && !(memcg->css.flags & CSS_ONLINE))
+ while (memcg && online && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg);
if (memcg)
ino = cgroup_ino(memcg->css.cgroup);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5b663eca1f29..6734489b2435 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -267,7 +267,7 @@ static int hwpoison_filter_task(struct page *p)
if (!hwpoison_filter_memcg)
return 0;
- if (page_cgroup_ino(p) != hwpoison_filter_memcg)
+ if (page_cgroup_ino(p, true) != hwpoison_filter_memcg)
return -EINVAL;
return 0;
--
2.25.1
next prev parent reply other threads:[~2023-09-11 7:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-11 7:55 [RFC PATCH 0/3] Helpers for debugging dying cgroups Yakunin, Dmitry (Nebius)
2023-09-11 7:55 ` [RFC PATCH 1/3] cgroup: list all subsystem states in debugfs files Yakunin, Dmitry (Nebius)
2023-09-11 18:55 ` tj
2023-09-13 10:33 ` Dmitry Yakunin
2023-09-11 22:16 ` Yosry Ahmed
2023-09-13 10:35 ` Dmitry Yakunin
2023-09-11 7:55 ` Yakunin, Dmitry (Nebius) [this message]
2023-09-11 7:55 ` [RFC PATCH 3/3] tools/mm/page-types: add flag for showing inodes of offline cgroups Yakunin, Dmitry (Nebius)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230911075437.74027-3-zeil@nebius.com \
--to=zeil@nebius.com \
--cc=NB-CoreTeam@nebius.com \
--cc=arbn@yandex-team.com \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=khlebnikov@yandex-team.ru \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox