From: Gang Li <ligang.bdlg@bytedance.com>
To: akpm@linux-foundation.org
Cc: songmuchun@bytedance.com, hca@linux.ibm.com, gor@linux.ibm.com,
agordeev@linux.ibm.com, borntraeger@linux.ibm.com,
svens@linux.ibm.com, ebiederm@xmission.com,
keescook@chromium.org, viro@zeniv.linux.org.uk,
rostedt@goodmis.org, mingo@redhat.com, peterz@infradead.org,
acme@kernel.org, mark.rutland@arm.com,
alexander.shishkin@linux.intel.com, jolsa@kernel.org,
namhyung@kernel.org, david@redhat.com, imbrenda@linux.ibm.com,
apopple@nvidia.com, adobriyan@gmail.com,
stephen.s.brennan@oracle.com, ohoono.kwon@samsung.com,
haolee.swjtu@gmail.com, kaleshsingh@google.com,
zhengqi.arch@bytedance.com, peterx@redhat.com,
shy828301@gmail.com, surenb@google.com, ccross@google.com,
vincent.whitchurch@axis.com, tglx@linutronix.de,
bigeasy@linutronix.de, fenghua.yu@intel.com,
linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
linux-perf-users@vger.kernel.org,
Gang Li <ligang.bdlg@bytedance.com>
Subject: [PATCH 2/5 v1] mm: add numa_count field for rss_stat
Date: Thu, 12 May 2022 12:46:31 +0800 [thread overview]
Message-ID: <20220512044634.63586-3-ligang.bdlg@bytedance.com> (raw)
In-Reply-To: <20220512044634.63586-1-ligang.bdlg@bytedance.com>
This patch add new fields `numa_count` for mm_rss_stat and
task_rss_stat.
`numa_count` are in the size of `sizeof(long) * num_possible_numa()`.
To reduce mem consumption, they only contain the sum of rss which is
needed by `oom_badness` instead of recording different kinds of rss
sepratly.
Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
---
include/linux/mm_types_task.h | 6 +++
kernel/fork.c | 70 +++++++++++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 3e7da8c7ab95..c1ac2a33b697 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -64,11 +64,17 @@ enum {
struct task_rss_stat {
int events; /* for synchronization threshold */
int count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+ int *numa_count;
+#endif
};
#endif /* USE_SPLIT_PTE_PTLOCKS */
struct mm_rss_stat {
atomic_long_t count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+ atomic_long_t *numa_count;
+#endif
};
struct page_frag {
diff --git a/kernel/fork.c b/kernel/fork.c
index 9796897560ab..e549e0b30e2b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -141,6 +141,10 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+#if (defined SPLIT_RSS_COUNTING) && (defined CONFIG_NUMA)
+#define SPLIT_RSS_NUMA_COUNTING
+#endif
+
#ifdef CONFIG_PROVE_RCU
int lockdep_tasklist_lock_is_held(void)
{
@@ -765,6 +769,16 @@ static void check_mm(struct mm_struct *mm)
mm, resident_page_types[i], x);
}
+#ifdef CONFIG_NUMA
+ for (i = 0; i < num_possible_nodes(); i++) {
+ long x = atomic_long_read(&mm->rss_stat.numa_count[i]);
+
+ if (unlikely(x))
+ pr_alert("BUG: Bad rss-counter state mm:%p node:%d val:%ld\n",
+ mm, i, x);
+ }
+#endif
+
if (mm_pgtables_bytes(mm))
pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
mm_pgtables_bytes(mm));
@@ -777,6 +791,29 @@ static void check_mm(struct mm_struct *mm)
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+#ifdef CONFIG_NUMA
+static inline void mm_free_rss_stat(struct mm_struct *mm)
+{
+ kfree(mm->rss_stat.numa_count);
+}
+
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+ memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+ mm->rss_stat.numa_count = kcalloc(num_possible_nodes(), sizeof(atomic_long_t), GFP_KERNEL);
+ if (unlikely(!mm->rss_stat.numa_count))
+ return -ENOMEM;
+ return 0;
+}
+#else
+static inline void mm_free_rss_stat(struct mm_struct *mm) {}
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+ memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+ return 0;
+}
+#endif
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
@@ -791,6 +828,7 @@ void __mmdrop(struct mm_struct *mm)
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
+ mm_free_rss_stat(mm);
put_user_ns(mm->user_ns);
free_mm(mm);
}
@@ -831,12 +869,22 @@ static inline void put_signal_struct(struct signal_struct *sig)
free_signal_struct(sig);
}
+#ifdef SPLIT_RSS_NUMA_COUNTING
+void rss_stat_free(struct task_struct *p)
+{
+ kfree(p->rss_stat.numa_count);
+}
+#else
+void rss_stat_free(struct task_struct *p) {}
+#endif
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
WARN_ON(refcount_read(&tsk->usage));
WARN_ON(tsk == current);
+ rss_stat_free(tsk);
io_uring_free(tsk);
cgroup_free(tsk);
task_numa_free(tsk, true);
@@ -963,6 +1011,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
+ int *numa_count __maybe_unused;
int err;
if (node == NUMA_NO_NODE)
@@ -984,9 +1033,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
account_kernel_stack(tsk, 1);
+#ifdef SPLIT_RSS_NUMA_COUNTING
+ numa_count = kcalloc(num_possible_nodes(), sizeof(int), GFP_KERNEL);
+ if (!numa_count)
+ goto free_stack;
+ tsk->rss_stat.numa_count = numa_count;
+#endif
+
err = scs_prepare(tsk, node);
if (err)
- goto free_stack;
+ goto free_rss_stat;
#ifdef CONFIG_SECCOMP
/*
@@ -1047,6 +1103,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
return tsk;
+free_rss_stat:
+#ifdef SPLIT_RSS_NUMA_COUNTING
+ kfree(numa_count);
+#endif
free_stack:
exit_task_stack_account(tsk);
free_thread_stack(tsk);
@@ -1117,7 +1177,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->map_count = 0;
mm->locked_vm = 0;
atomic64_set(&mm->pinned_vm, 0);
- memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock);
mm_init_cpumask(mm);
@@ -1144,6 +1203,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (mm_alloc_pgd(mm))
goto fail_nopgd;
+ if (mm_init_rss_stat(mm))
+ goto fail_nocontext;
+
if (init_new_context(p, mm))
goto fail_nocontext;
@@ -2139,7 +2201,9 @@ static __latent_entropy struct task_struct *copy_process(
p->io_uring = NULL;
#endif
-#if defined(SPLIT_RSS_COUNTING)
+#ifdef SPLIT_RSS_NUMA_COUNTING
+ memset(&p->rss_stat, 0, sizeof(p->rss_stat) - sizeof(p->rss_stat.numa_count));
+#else
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
--
2.20.1
next prev parent reply other threads:[~2022-05-12 4:47 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-12 4:46 [PATCH 0/5 v1] mm, oom: Introduce per numa node oom for CONSTRAINT_MEMORY_POLICY Gang Li
2022-05-12 4:46 ` [PATCH 1/5 v1] mm: add a new parameter `node` to `get/add/inc/dec_mm_counter` Gang Li
2022-05-12 4:46 ` Gang Li [this message]
2022-05-12 4:46 ` [PATCH 3/5 v1] mm: add numa fields for tracepoint rss_stat Gang Li
2022-05-12 4:46 ` [PATCH 4/5 v1] mm: enable per numa node rss_stat count Gang Li
2022-05-17 2:28 ` [mm] c9dc81ef10: BUG:Bad_rss-counter_state_mm:#node:#val kernel test robot
2022-05-12 4:46 ` [PATCH 5/5 v1] mm, oom: enable per numa node oom for CONSTRAINT_MEMORY_POLICY Gang Li
2022-05-12 22:31 ` [PATCH 0/5 v1] mm, oom: Introduce " Suren Baghdasaryan
2022-05-16 16:44 ` Michal Hocko
2022-06-15 10:13 ` Gang Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220512044634.63586-3-ligang.bdlg@bytedance.com \
--to=ligang.bdlg@bytedance.com \
--cc=acme@kernel.org \
--cc=adobriyan@gmail.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=apopple@nvidia.com \
--cc=bigeasy@linutronix.de \
--cc=borntraeger@linux.ibm.com \
--cc=ccross@google.com \
--cc=david@redhat.com \
--cc=ebiederm@xmission.com \
--cc=fenghua.yu@intel.com \
--cc=gor@linux.ibm.com \
--cc=haolee.swjtu@gmail.com \
--cc=hca@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=jolsa@kernel.org \
--cc=kaleshsingh@google.com \
--cc=keescook@chromium.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=ohoono.kwon@samsung.com \
--cc=peterx@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=shy828301@gmail.com \
--cc=songmuchun@bytedance.com \
--cc=stephen.s.brennan@oracle.com \
--cc=surenb@google.com \
--cc=svens@linux.ibm.com \
--cc=tglx@linutronix.de \
--cc=vincent.whitchurch@axis.com \
--cc=viro@zeniv.linux.org.uk \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox