linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
To: mhiramat@kernel.org, oleg@redhat.com, peterz@infradead.org,
	srikar@linux.vnet.ibm.com
Cc: acme@kernel.org, ananth@linux.vnet.ibm.com,
	akpm@linux-foundation.org, alexander.shishkin@linux.intel.com,
	alexis.berlemont@gmail.com, corbet@lwn.net,
	dan.j.williams@intel.com, gregkh@linuxfoundation.org,
	huawei.libin@huawei.com, hughd@google.com, jack@suse.cz,
	jglisse@redhat.com, jolsa@redhat.com, kan.liang@intel.com,
	kirill.shutemov@linux.intel.com, kjlx@templeofstupid.com,
	kstewart@linuxfoundation.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	mhocko@suse.com, milian.wolff@kdab.com, mingo@redhat.com,
	namhyung@kernel.org, naveen.n.rao@linux.vnet.ibm.com,
	pc@us.ibm.com, pombredanne@nexb.com, rostedt@goodmis.org,
	tglx@linutronix.de, tmricht@linux.vnet.ibm.com,
	willy@infradead.org, yao.jin@linux.intel.com,
	fengguang.wu@intel.com,
	Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Subject: [PATCH 6/8] trace_uprobe/sdt: Fix multiple update of same reference counter
Date: Tue, 13 Mar 2018 18:26:01 +0530	[thread overview]
Message-ID: <20180313125603.19819-7-ravi.bangoria@linux.vnet.ibm.com> (raw)
In-Reply-To: <20180313125603.19819-1-ravi.bangoria@linux.vnet.ibm.com>

For tiny binaries/libraries, different mmap regions points to the
same file portion. In such cases, we may increment reference counter
multiple times. But while de-registration, reference counter will get
decremented only by once leaving reference counter > 0 even if no one
is tracing on that marker.

Ensure increment and decrement happens in sync by keeping list of
mms in trace_uprobe. Increment reference counter only if mm is not
present in the list and decrement only if mm is present in the list.

Example

  # echo "p:sdt_tick/loop2 /tmp/tick:0x6e4(0x10036)" > uprobe_events

Before patch:

  # perf stat -a -e sdt_tick:loop2
  # /tmp/tick
  # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
   0000000: 02                                       .

  # pkill perf
  # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
  0000000: 01                                       .

After patch:

  # perf stat -a -e sdt_tick:loop2
  # /tmp/tick
  # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
  0000000: 01                                       .

  # pkill perf
  # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
  0000000: 00                                       .

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
---
 kernel/trace/trace_uprobe.c | 105 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index b6c9b48..9bf3f7a 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -50,6 +50,11 @@ struct trace_uprobe_filter {
 	struct list_head	perf_events;
 };
 
+struct sdt_mm_list {
+	struct mm_struct *mm;
+	struct sdt_mm_list *next;
+};
+
 /*
  * uprobe event core functions
  */
@@ -61,6 +66,8 @@ struct trace_uprobe {
 	char				*filename;
 	unsigned long			offset;
 	unsigned long			ref_ctr_offset;
+	struct sdt_mm_list		*sml;
+	struct rw_semaphore		sml_rw_sem;
 	unsigned long			nhit;
 	struct trace_probe		tp;
 };
@@ -274,6 +281,7 @@ static inline bool is_ret_probe(struct trace_uprobe *tu)
 	if (is_ret)
 		tu->consumer.ret_handler = uretprobe_dispatcher;
 	init_trace_uprobe_filter(&tu->filter);
+	init_rwsem(&tu->sml_rw_sem);
 	return tu;
 
 error:
@@ -921,6 +929,74 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
 	return trace_handle_return(s);
 }
 
+static bool sdt_check_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+	struct sdt_mm_list *tmp = tu->sml;
+
+	if (!tu->sml || !mm)
+		return false;
+
+	while (tmp) {
+		if (tmp->mm == mm)
+			return true;
+		tmp = tmp->next;
+	}
+
+	return false;
+}
+
+static void sdt_add_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+	struct sdt_mm_list *tmp;
+
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return;
+
+	tmp->mm = mm;
+	tmp->next = tu->sml;
+	tu->sml = tmp;
+}
+
+static void sdt_del_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+	struct sdt_mm_list *prev, *curr;
+
+	if (!tu->sml)
+		return;
+
+	if (tu->sml->mm == mm) {
+		curr = tu->sml;
+		tu->sml = tu->sml->next;
+		kfree(curr);
+		return;
+	}
+
+	prev = tu->sml;
+	curr = tu->sml->next;
+	while (curr) {
+		if (curr->mm == mm) {
+			prev->next = curr->next;
+			kfree(curr);
+			return;
+		}
+		prev = curr;
+		curr = curr->next;
+	}
+}
+
+static void sdt_flush_mm_list(struct trace_uprobe *tu)
+{
+	struct sdt_mm_list *next, *curr = tu->sml;
+
+	while (curr) {
+		next = curr->next;
+		kfree(curr);
+		curr = next;
+	}
+	tu->sml = NULL;
+}
+
 static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct *vma)
 {
 	unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
@@ -989,17 +1065,25 @@ static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
 	if (IS_ERR(info))
 		goto out;
 
+	down_write(&tu->sml_rw_sem);
 	while (info) {
+		if (sdt_check_mm_list(tu, info->mm))
+			goto cont;
+
 		down_write(&info->mm->mmap_sem);
 
 		vma = sdt_find_vma(info->mm, tu);
 		vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
-		sdt_update_ref_ctr(info->mm, vaddr, 1);
+		if (!sdt_update_ref_ctr(info->mm, vaddr, 1))
+			sdt_add_mm_list(tu, info->mm);
 
 		up_write(&info->mm->mmap_sem);
+
+cont:
 		mmput(info->mm);
 		info = uprobe_free_map_info(info);
 	}
+	up_write(&tu->sml_rw_sem);
 
 out:
 	uprobe_end_dup_mmap();
@@ -1020,8 +1104,16 @@ void trace_uprobe_mmap_callback(struct vm_area_struct *vma)
 		    !trace_probe_is_enabled(&tu->tp))
 			continue;
 
+		down_write(&tu->sml_rw_sem);
+		if (sdt_check_mm_list(tu, vma->vm_mm))
+			goto cont;
+
 		vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
-		sdt_update_ref_ctr(vma->vm_mm, vaddr, 1);
+		if (!sdt_update_ref_ctr(vma->vm_mm, vaddr, 1))
+			sdt_add_mm_list(tu, vma->vm_mm);
+
+cont:
+		up_write(&tu->sml_rw_sem);
 	}
 	mutex_unlock(&uprobe_lock);
 }
@@ -1038,7 +1130,11 @@ static void sdt_decrement_ref_ctr(struct trace_uprobe *tu)
 	if (IS_ERR(info))
 		goto out;
 
+	down_write(&tu->sml_rw_sem);
 	while (info) {
+		if (!sdt_check_mm_list(tu, info->mm))
+			goto cont;
+
 		down_write(&info->mm->mmap_sem);
 
 		vma = sdt_find_vma(info->mm, tu);
@@ -1046,9 +1142,14 @@ static void sdt_decrement_ref_ctr(struct trace_uprobe *tu)
 		sdt_update_ref_ctr(info->mm, vaddr, -1);
 
 		up_write(&info->mm->mmap_sem);
+		sdt_del_mm_list(tu, info->mm);
+
+cont:
 		mmput(info->mm);
 		info = uprobe_free_map_info(info);
 	}
+	sdt_flush_mm_list(tu);
+	up_write(&tu->sml_rw_sem);
 
 out:
 	uprobe_end_dup_mmap();
-- 
1.8.3.1

  parent reply	other threads:[~2018-03-13 12:55 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-13 12:55 [PATCH 0/8] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-03-13 12:55 ` [PATCH 1/8] Uprobe: Export vaddr <-> offset conversion functions Ravi Bangoria
2018-03-13 20:36   ` Jerome Glisse
2018-03-15 16:27   ` Steven Rostedt
2018-03-16  8:54     ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 2/8] mm: Prefix vma_ to vaddr_to_offset() and offset_to_vaddr() Ravi Bangoria
2018-03-13 20:38   ` Jerome Glisse
2018-03-15 16:28   ` Steven Rostedt
2018-03-16  8:58     ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 3/8] Uprobe: Rename map_info to uprobe_map_info Ravi Bangoria
2018-03-13 20:39   ` Jerome Glisse
2018-03-15 16:44   ` Steven Rostedt
2018-03-16  8:56     ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 4/8] Uprobe: Export uprobe_map_info along with uprobe_{build/free}_map_info() Ravi Bangoria
2018-03-13 20:40   ` Jerome Glisse
2018-03-15 16:32   ` Steven Rostedt
2018-03-16  8:59     ` Ravi Bangoria
2018-03-13 12:56 ` [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-03-14 13:48   ` Masami Hiramatsu
2018-03-14 15:12     ` Ravi Bangoria
2018-03-14 16:59   ` Oleg Nesterov
2018-03-15 11:23     ` Ravi Bangoria
2018-03-19  4:28     ` Ravi Bangoria
2018-03-19 13:46       ` Oleg Nesterov
2018-03-14 21:58   ` Steven Rostedt
2018-03-15 14:21   ` Oleg Nesterov
2018-03-15 14:30     ` Oleg Nesterov
2018-03-16  9:28       ` Ravi Bangoria
2018-03-16 11:39         ` Oleg Nesterov
2018-03-16 11:46           ` Ravi Bangoria
2018-03-16  9:21     ` Ravi Bangoria
2018-03-15 15:01   ` Oleg Nesterov
2018-03-16  9:31     ` Ravi Bangoria
2018-03-15 16:48   ` Steven Rostedt
2018-03-16  9:01     ` Ravi Bangoria
2018-03-16 16:16     ` Oleg Nesterov
2018-03-13 12:56 ` Ravi Bangoria [this message]
2018-03-14 14:15   ` [PATCH 6/8] trace_uprobe/sdt: Fix multiple update of same reference counter Masami Hiramatsu
2018-03-14 15:15     ` Ravi Bangoria
2018-03-15 14:49   ` Oleg Nesterov
2018-03-16 12:12     ` Ravi Bangoria
2018-03-16 13:49       ` Ravi Bangoria
2018-03-16 17:50       ` Oleg Nesterov
2018-03-19  9:18         ` Ravi Bangoria
2018-03-19 13:40           ` Oleg Nesterov
2018-03-13 12:56 ` [PATCH 7/8] perf probe: Support SDT markers having reference counter (semaphore) Ravi Bangoria
2018-03-14 14:09   ` Masami Hiramatsu
2018-03-14 15:21     ` Ravi Bangoria
2018-03-13 12:56 ` [PATCH 8/8] trace_uprobe/sdt: Document about reference counter Ravi Bangoria
2018-03-14 13:50   ` Masami Hiramatsu
2018-03-14 15:22     ` Ravi Bangoria
2018-03-15 12:47       ` Masami Hiramatsu
2018-03-16  9:42         ` Ravi Bangoria
2018-03-16 14:26           ` Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180313125603.19819-7-ravi.bangoria@linux.vnet.ibm.com \
    --to=ravi.bangoria@linux.vnet.ibm.com \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexis.berlemont@gmail.com \
    --cc=ananth@linux.vnet.ibm.com \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=fengguang.wu@intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=huawei.libin@huawei.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=jglisse@redhat.com \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kjlx@templeofstupid.com \
    --cc=kstewart@linuxfoundation.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhiramat@kernel.org \
    --cc=mhocko@suse.com \
    --cc=milian.wolff@kdab.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=naveen.n.rao@linux.vnet.ibm.com \
    --cc=oleg@redhat.com \
    --cc=pc@us.ibm.com \
    --cc=peterz@infradead.org \
    --cc=pombredanne@nexb.com \
    --cc=rostedt@goodmis.org \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=tmricht@linux.vnet.ibm.com \
    --cc=willy@infradead.org \
    --cc=yao.jin@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox