linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Gang Li <ligang.bdlg@bytedance.com>
To: mhocko@suse.com, akpm@linux-foundation.org, surenb@google.com
Cc: hca@linux.ibm.com, gor@linux.ibm.com, agordeev@linux.ibm.com,
	borntraeger@linux.ibm.com, svens@linux.ibm.com,
	viro@zeniv.linux.org.uk, ebiederm@xmission.com,
	keescook@chromium.org, rostedt@goodmis.org, mingo@redhat.com,
	peterz@infradead.org, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, david@redhat.com, imbrenda@linux.ibm.com,
	adobriyan@gmail.com, yang.yang29@zte.com.cn, brauner@kernel.org,
	stephen.s.brennan@oracle.com, zhengqi.arch@bytedance.com,
	haolee.swjtu@gmail.com, xu.xin16@zte.com.cn,
	Liam.Howlett@Oracle.com, ohoono.kwon@samsung.com,
	peterx@redhat.com, arnd@arndb.de, shy828301@gmail.com,
	alex.sierra@amd.com, xianting.tian@linux.alibaba.com,
	willy@infradead.org, ccross@google.com, vbabka@suse.cz,
	sujiaxun@uniontech.com, sfr@canb.auug.org.au,
	vasily.averin@linux.dev, mgorman@suse.de, vvghjk1234@gmail.com,
	tglx@linutronix.de, luto@kernel.org, bigeasy@linutronix.de,
	fenghua.yu@intel.com, linux-s390@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-perf-users@vger.kernel.org,
	Gang Li <ligang.bdlg@bytedance.com>
Subject: [PATCH v2 2/5] mm: add numa_count field for rss_stat
Date: Fri,  8 Jul 2022 16:21:26 +0800	[thread overview]
Message-ID: <20220708082129.80115-3-ligang.bdlg@bytedance.com> (raw)
In-Reply-To: <20220708082129.80115-1-ligang.bdlg@bytedance.com>

This patch add new fields `numa_count` for mm_rss_stat and
task_rss_stat.

`numa_count` are in the size of `sizeof(long) * num_possible_numa()`.
To reduce mem consumption, they only contain the sum of rss which is
needed by `oom_badness` instead of recording different kinds of rss
sepratly.

Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
---
 include/linux/mm_types_task.h |  6 +++
 kernel/fork.c                 | 70 +++++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 32512af31721..9fd34ab484f4 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -52,11 +52,17 @@ enum {
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+	int *numa_count;
+#endif
 };
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+	atomic_long_t *numa_count;
+#endif
 };
 
 struct page_frag {
diff --git a/kernel/fork.c b/kernel/fork.c
index 23f0ba3affe5..f4f93d6fecd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -140,6 +140,10 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+#if (defined SPLIT_RSS_COUNTING) && (defined CONFIG_NUMA)
+#define SPLIT_RSS_NUMA_COUNTING
+#endif
+
 #ifdef CONFIG_PROVE_RCU
 int lockdep_tasklist_lock_is_held(void)
 {
@@ -757,6 +761,16 @@ static void check_mm(struct mm_struct *mm)
 				 mm, resident_page_types[i], x);
 	}
 
+#ifdef CONFIG_NUMA
+	for (i = 0; i < num_possible_nodes(); i++) {
+		long x = atomic_long_read(&mm->rss_stat.numa_count[i]);
+
+		if (unlikely(x))
+			pr_alert("BUG: Bad rss-counter state mm:%p node:%d val:%ld\n",
+				 mm, i, x);
+	}
+#endif
+
 	if (mm_pgtables_bytes(mm))
 		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
 				mm_pgtables_bytes(mm));
@@ -769,6 +783,29 @@ static void check_mm(struct mm_struct *mm)
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
+#ifdef CONFIG_NUMA
+static inline void mm_free_rss_stat(struct mm_struct *mm)
+{
+	kfree(mm->rss_stat.numa_count);
+}
+
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+	memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+	mm->rss_stat.numa_count = kcalloc(num_possible_nodes(), sizeof(atomic_long_t), GFP_KERNEL);
+	if (unlikely(!mm->rss_stat.numa_count))
+		return -ENOMEM;
+	return 0;
+}
+#else
+static inline void mm_free_rss_stat(struct mm_struct *mm) {}
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+	memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+	return 0;
+}
+#endif
+
 /*
  * Called when the last reference to the mm
  * is dropped: either by a lazy thread or by
@@ -783,6 +820,7 @@ void __mmdrop(struct mm_struct *mm)
 	destroy_context(mm);
 	mmu_notifier_subscriptions_destroy(mm);
 	check_mm(mm);
+	mm_free_rss_stat(mm);
 	put_user_ns(mm->user_ns);
 	mm_pasid_drop(mm);
 	free_mm(mm);
@@ -824,12 +862,22 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+#ifdef SPLIT_RSS_NUMA_COUNTING
+void rss_stat_free(struct task_struct *p)
+{
+	kfree(p->rss_stat.numa_count);
+}
+#else
+void rss_stat_free(struct task_struct *p) {}
+#endif
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
 	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	rss_stat_free(tsk);
 	io_uring_free(tsk);
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
@@ -956,6 +1004,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
+	int *numa_count __maybe_unused;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -977,9 +1026,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 	account_kernel_stack(tsk, 1);
 
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	numa_count = kcalloc(num_possible_nodes(), sizeof(int), GFP_KERNEL);
+	if (!numa_count)
+		goto free_stack;
+	tsk->rss_stat.numa_count = numa_count;
+#endif
+
 	err = scs_prepare(tsk, node);
 	if (err)
-		goto free_stack;
+		goto free_rss_stat;
 
 #ifdef CONFIG_SECCOMP
 	/*
@@ -1045,6 +1101,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	return tsk;
 
+free_rss_stat:
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	kfree(numa_count);
+#endif
 free_stack:
 	exit_task_stack_account(tsk);
 	free_thread_stack(tsk);
@@ -1114,7 +1174,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->map_count = 0;
 	mm->locked_vm = 0;
 	atomic64_set(&mm->pinned_vm, 0);
-	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	spin_lock_init(&mm->arg_lock);
 	mm_init_cpumask(mm);
@@ -1141,6 +1200,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	if (mm_alloc_pgd(mm))
 		goto fail_nopgd;
 
+	if (mm_init_rss_stat(mm))
+		goto fail_nocontext;
+
 	if (init_new_context(p, mm))
 		goto fail_nocontext;
 
@@ -2142,7 +2204,9 @@ static __latent_entropy struct task_struct *copy_process(
 	p->io_uring = NULL;
 #endif
 
-#if defined(SPLIT_RSS_COUNTING)
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	memset(&p->rss_stat, 0, sizeof(p->rss_stat) - sizeof(p->rss_stat.numa_count));
+#else
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
 
-- 
2.20.1



  parent reply	other threads:[~2022-07-08  8:22 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20220708082129.80115-1-ligang.bdlg@bytedance.com>
2022-07-08  8:21 ` [PATCH v2 1/5] mm: add a new parameter `node` to `get/add/inc/dec_mm_counter` Gang Li
2022-07-12  6:33   ` [mm] c20f7bacef: WARNING:possible_circular_locking_dependency_detected kernel test robot
2022-07-08  8:21 ` Gang Li [this message]
2022-07-08  8:21 ` [PATCH v2 3/5] mm: add numa fields for tracepoint rss_stat Gang Li
2022-07-08 17:31   ` Steven Rostedt
2022-07-08  8:21 ` [PATCH v2 4/5] mm: enable per numa node rss_stat count Gang Li
2022-07-08  8:21 ` [PATCH v2 5/5] mm, oom: enable per numa node oom for CONSTRAINT_{MEMORY_POLICY,CPUSET} Gang Li
2022-07-08  8:54 ` [PATCH v2 0/5] mm, oom: Introduce " Michal Hocko
2022-07-08  9:25   ` Gang Li
2022-07-08  9:37     ` Michal Hocko
2022-07-12 11:12   ` Abel Wu
2022-07-12 13:35     ` Michal Hocko
2022-07-12 15:00       ` Abel Wu
2022-07-18 12:11         ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220708082129.80115-3-ligang.bdlg@bytedance.com \
    --to=ligang.bdlg@bytedance.com \
    --cc=Liam.Howlett@Oracle.com \
    --cc=acme@kernel.org \
    --cc=adobriyan@gmail.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=bigeasy@linutronix.de \
    --cc=borntraeger@linux.ibm.com \
    --cc=brauner@kernel.org \
    --cc=ccross@google.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=fenghua.yu@intel.com \
    --cc=gor@linux.ibm.com \
    --cc=haolee.swjtu@gmail.com \
    --cc=hca@linux.ibm.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=jolsa@kernel.org \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=ohoono.kwon@samsung.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sfr@canb.auug.org.au \
    --cc=shy828301@gmail.com \
    --cc=stephen.s.brennan@oracle.com \
    --cc=sujiaxun@uniontech.com \
    --cc=surenb@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vasily.averin@linux.dev \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vvghjk1234@gmail.com \
    --cc=willy@infradead.org \
    --cc=xianting.tian@linux.alibaba.com \
    --cc=xu.xin16@zte.com.cn \
    --cc=yang.yang29@zte.com.cn \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox