linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<nvdimm@lists.linux.dev>, <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <djwong@kernel.org>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@infradead.org>, <jane.chu@oracle.com>
Subject: [PATCH v9 08/10] mm: Introduce mf_dax_kill_procs() for fsdax case
Date: Sun, 26 Dec 2021 22:34:37 +0800	[thread overview]
Message-ID: <20211226143439.3985960-9-ruansy.fnst@fujitsu.com> (raw)
In-Reply-To: <20211226143439.3985960-1-ruansy.fnst@fujitsu.com>

This function is called at the end of RMAP routine, i.e. filesystem
recovery function, to collect and kill processes using a shared page of
DAX file.  The difference with mf_generic_kill_procs() is, it accepts
file's (mapping,offset) instead of struct page because different files'
mappings and offsets may share the same page in fsdax mode.
It will be called when filesystem's RMAP results are found.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 include/linux/mm.h  |  2 +
 mm/memory-failure.c | 89 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3f44cd9e296c..15212a78bf1d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3234,6 +3234,8 @@ enum mf_flags {
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
 };
+int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
+		      unsigned long count, int mf_flags);
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
 extern void memory_failure_queue_kick(int cpu);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3cc612b29f89..dc61f97bba2f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -303,10 +303,9 @@ void shake_page(struct page *p)
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
-static unsigned long dev_pagemap_mapping_shift(struct page *page,
-		struct vm_area_struct *vma)
+static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
+		unsigned long address)
 {
-	unsigned long address = vma_address(page, vma);
 	unsigned long ret = 0;
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -346,9 +345,8 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
  * Schedule a process for later kill.
  * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
  */
-static void add_to_kill(struct task_struct *tsk, struct page *p,
-		       struct vm_area_struct *vma,
-		       struct list_head *to_kill)
+static void add_to_kill(struct task_struct *tsk, struct page *p, pgoff_t pgoff,
+			struct vm_area_struct *vma, struct list_head *to_kill)
 {
 	struct to_kill *tk;
 
@@ -359,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	}
 
 	tk->addr = page_address_in_vma(p, vma);
-	if (is_zone_device_page(p))
-		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
-	else
+	if (is_zone_device_page(p)) {
+		/*
+		 * Since page->mapping is not used for fsdax, we need
+		 * calculate the address based on the vma.
+		 */
+		if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
+			tk->addr = vma_pgoff_address(vma, pgoff);
+		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
+	} else
 		tk->size_shift = page_shift(compound_head(p));
 
 	/*
@@ -509,7 +513,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 			if (!page_mapped_in_vma(page, vma))
 				continue;
 			if (vma->vm_mm == t->mm)
-				add_to_kill(t, page, vma, to_kill);
+				add_to_kill(t, page, 0, vma, to_kill);
 		}
 	}
 	read_unlock(&tasklist_lock);
@@ -545,7 +549,33 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 			 * to be informed of all such data corruptions.
 			 */
 			if (vma->vm_mm == t->mm)
-				add_to_kill(t, page, vma, to_kill);
+				add_to_kill(t, page, 0, vma, to_kill);
+		}
+	}
+	read_unlock(&tasklist_lock);
+	i_mmap_unlock_read(mapping);
+}
+
+/*
+ * Collect processes when the error hit a fsdax page.
+ */
+static void collect_procs_fsdax(struct page *page,
+		struct address_space *mapping, pgoff_t pgoff,
+		struct list_head *to_kill)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+
+	i_mmap_lock_read(mapping);
+	read_lock(&tasklist_lock);
+	for_each_process(tsk) {
+		struct task_struct *t = task_early_kill(tsk, true);
+
+		if (!t)
+			continue;
+		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+			if (vma->vm_mm == t->mm)
+				add_to_kill(t, page, pgoff, vma, to_kill);
 		}
 	}
 	read_unlock(&tasklist_lock);
@@ -1523,6 +1553,43 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 	return 0;
 }
 
+/**
+ * mf_dax_kill_procs - Collect and kill processes who are using this file range
+ * @mapping:	the file in use
+ * @index:	start pgoff of the range within the file
+ * @count:	length of the range, in unit of PAGE_SIZE
+ * @mf_flags:	memory failure flags
+ */
+int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
+		unsigned long count, int mf_flags)
+{
+	LIST_HEAD(to_kill);
+	dax_entry_t cookie;
+	struct page *page;
+	size_t end = index + count;
+
+	mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
+
+	for (; index < end; index++) {
+		page = NULL;
+		cookie = dax_lock_mapping_entry(mapping, index, &page);
+		if (!cookie)
+			return -EBUSY;
+		if (!page)
+			goto unlock;
+
+		SetPageHWPoison(page);
+
+		collect_procs_fsdax(page, mapping, index, &to_kill);
+		unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
+				index, mf_flags);
+unlock:
+		dax_unlock_mapping_entry(mapping, index, cookie);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
+
 static int memory_failure_hugetlb(unsigned long pfn, int flags)
 {
 	struct page *p = pfn_to_page(pfn);
-- 
2.34.1





  parent reply	other threads:[~2021-12-26 14:35 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-26 14:34 [PATCH v9 00/10] fsdax: introduce fs query to support reflink Shiyang Ruan
2021-12-26 14:34 ` [PATCH v9 01/10] dax: Use percpu rwsem for dax_{read,write}_lock() Shiyang Ruan
2022-01-04 22:44   ` Dan Williams
2022-01-05 17:45     ` Darrick J. Wong
2022-01-06 11:06     ` Shiyang Ruan
2021-12-26 14:34 ` [PATCH v9 02/10] dax: Introduce holder for dax_device Shiyang Ruan
2022-01-05 18:12   ` Darrick J. Wong
2022-01-05 18:23     ` Dan Williams
2022-01-05 18:56       ` Darrick J. Wong
2022-01-05 19:20         ` Dan Williams
2022-01-05 22:47           ` Darrick J. Wong
2022-01-05 23:01             ` Dan Williams
2022-01-05 23:54               ` Darrick J. Wong
2022-01-06  0:12                 ` Dan Williams
2022-01-20  8:46                   ` Christoph Hellwig
     [not found]                     ` <76f5ed28-2df9-890e-0674-3ef2f18e2c2f@fujitsu.com>
2022-01-21  2:22                       ` Darrick J. Wong
2022-01-21  7:17                         ` Christoph Hellwig
2022-02-15 21:51                         ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 03/10] mm: factor helpers for memory_failure_dev_pagemap Shiyang Ruan
2021-12-26 14:34 ` [PATCH v9 04/10] pagemap,pmem: Introduce ->memory_failure() Shiyang Ruan
2022-01-05 19:06   ` Darrick J. Wong
2021-12-26 14:34 ` [PATCH v9 05/10] fsdax: fix function description Shiyang Ruan
2022-01-05 17:50   ` Darrick J. Wong
2022-01-20  8:47   ` Christoph Hellwig
2021-12-26 14:34 ` [PATCH v9 06/10] fsdax: Introduce dax_lock_mapping_entry() Shiyang Ruan
2022-01-04 22:55   ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 07/10] mm: move pgoff_address() to vma_pgoff_address() Shiyang Ruan
2022-01-20  8:47   ` Christoph Hellwig
2021-12-26 14:34 ` Shiyang Ruan [this message]
2022-01-20  8:55   ` [PATCH v9 08/10] mm: Introduce mf_dax_kill_procs() for fsdax case Christoph Hellwig
2021-12-26 14:34 ` [PATCH v9 09/10] xfs: Implement ->notify_failure() for XFS Shiyang Ruan
2022-01-05 18:53   ` Darrick J. Wong
2022-01-05 21:17     ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 10/10] fsdax: set a CoW flag when associate reflink mappings Shiyang Ruan
2022-01-20  8:59   ` Christoph Hellwig
2022-01-21  2:33     ` Shiyang Ruan
2022-01-21  7:16       ` Christoph Hellwig
2022-01-21  8:34         ` Shiyang Ruan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211226143439.3985960-9-ruansy.fnst@fujitsu.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jane.chu@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox