linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: "John Hubbard" <jhubbard@nvidia.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Jatin Kumar" <jakumar@nvidia.com>,
	"Mark Hairgrove" <mhairgrove@nvidia.com>,
	"Sherry Cheung" <SCheung@nvidia.com>,
	"Subhash Gutti" <sgutti@nvidia.com>
Subject: [HMM v13 11/18] mm/hmm/mirror: add range monitor helper, to monitor CPU page table update
Date: Fri, 18 Nov 2016 13:18:20 -0500	[thread overview]
Message-ID: <1479493107-982-12-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1479493107-982-1-git-send-email-jglisse@redhat.com>

Complement the hmm_vma_range_lock/unlock() mechanism with a range monitor that do
not block CPU page table invalidation and thus do not garanty forward progress. It
is still usefull as in many situations concurrent CPU page table update and CPU
snapshot are taking place in different region of the virtual address space.

Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
Signed-off-by: Jatin Kumar <jakumar@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: Sherry Cheung <SCheung@nvidia.com>
Signed-off-by: Subhash Gutti <sgutti@nvidia.com>
---
 include/linux/hmm.h | 18 ++++++++++
 mm/hmm.c            | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index c0b1c07..6571647 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -254,6 +254,24 @@ int hmm_vma_range_lock(struct hmm_range *range,
 void hmm_vma_range_unlock(struct hmm_range *range);
 
 
+/*
+ * Monitoring a range allow to track any CPU page table modification that can
+ * affect the range. It complements the hmm_vma_range_lock/unlock() mechanism
+ * as a non blocking method for synchronizing device page table with the CPU
+ * page table. See functions description in mm/hmm.c for documentation.
+ *
+ * NOTE AFTER A CALL TO hmm_vma_range_monitor_start() THAT RETURNED TRUE YOU
+ * MUST MAKE A CALL TO hmm_vma_range_monitor_end() BEFORE FREEING THE RANGE
+ * STRUCT OR BAD THING WILL HAPPEN !
+ */
+bool hmm_vma_range_monitor_start(struct hmm_range *range,
+				 struct vm_area_struct *vma,
+				 unsigned long start,
+				 unsigned long end,
+				 bool wait);
+bool hmm_vma_range_monitor_end(struct hmm_range *range);
+
+
 /* Below are for HMM internal use only ! Not to be use by device driver ! */
 void hmm_mm_destroy(struct mm_struct *mm);
 
diff --git a/mm/hmm.c b/mm/hmm.c
index ee05419..746eb96 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -40,6 +40,7 @@ struct hmm {
 	spinlock_t		lock;
 	struct list_head	ranges;
 	struct list_head	mirrors;
+	struct list_head	monitors;
 	atomic_t		sequence;
 	wait_queue_head_t	wait_queue;
 	struct mmu_notifier	mmu_notifier;
@@ -65,6 +66,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
 			return NULL;
 		init_waitqueue_head(&hmm->wait_queue);
 		atomic_set(&hmm->notifier_count, 0);
+		INIT_LIST_HEAD(&hmm->monitors);
 		INIT_LIST_HEAD(&hmm->mirrors);
 		atomic_set(&hmm->sequence, 0);
 		hmm->mmu_notifier.ops = NULL;
@@ -112,7 +114,7 @@ static void hmm_invalidate_range(struct hmm *hmm,
 				 unsigned long start,
 				 unsigned long end)
 {
-	struct hmm_range range, *tmp;
+	struct hmm_range range, *tmp, *next;
 	struct hmm_mirror *mirror;
 
 	/*
@@ -127,6 +129,13 @@ static void hmm_invalidate_range(struct hmm *hmm,
 	range.hmm = hmm;
 
 	spin_lock(&hmm->lock);
+	/* Remove any range monitors */
+	list_for_each_entry_safe (tmp, next, &hmm->monitors, list) {
+		if (range.start >= tmp->end || range.end <= tmp->start)
+			continue;
+		/* This range is no longer valid */
+		list_del_init(&tmp->list);
+	}
 	list_for_each_entry (tmp, &hmm->ranges, list) {
 		if (range.start >= tmp->end || range.end <= tmp->start)
 			continue;
@@ -361,3 +370,87 @@ void hmm_vma_range_unlock(struct hmm_range *range)
 		wake_up(&hmm->wait_queue);
 }
 EXPORT_SYMBOL(hmm_vma_range_unlock);
+
+
+/*
+ * hmm_vma_range_monitor_start() - start monitoring of a range
+ * @range: pointer to hmm_range struct use to monitor
+ * @vma: virtual memory area for the range
+ * @start: start address of the range to monitor (inclusive)
+ * @end: end address of the range to monitor (exclusive)
+ * @wait: wait for any pending CPU page table to finish
+ * Returns: false if there is pendding CPU page table update, true otherwise
+ *
+ * The use pattern of this function is :
+ *   retry:
+ *       hmm_vma_range_monitor_start(range, vma, start, end, true);
+ *       // Do something that rely on stable CPU page table content but do not
+ *       // Prepare device page table update transaction
+ *       ...
+ *       // Take device driver lock that serialize device page table update
+ *       driver_lock_device_page_table_update();
+ *       if (!hmm_vma_range_monitor_end(range)) {
+ *           driver_unlock_device_page_table_update();
+ *           // Abort transaction you just build and cleanup anything that need
+ *           // to be. Same comment as above, about avoiding busy loop.
+ *           goto retry;
+ *       }
+ *       // Commit device page table update
+ *       driver_unlock_device_page_table_update();
+ */
+bool hmm_vma_range_monitor_start(struct hmm_range *range,
+				 struct vm_area_struct *vma,
+				 unsigned long start,
+				 unsigned long end,
+				 bool wait)
+{
+	BUG_ON(!vma);
+	BUG_ON(!range);
+
+	INIT_LIST_HEAD(&range->list);
+	range->hmm = hmm_register(vma->vm_mm);
+	if (!range->hmm)
+		return false;
+
+again:
+	spin_lock(&range->hmm->lock);
+	if (atomic_read(&range->hmm->notifier_count)) {
+		spin_unlock(&range->hmm->lock);
+		if (!wait)
+			return false;
+		/*
+		 * FIXME: Wait for all active mmu_notifier this is because we
+		 * can no keep an hmm_range struct around while waiting for
+		 * range invalidation to finish. Need to update mmu_notifier
+		 * to make this doable.
+		 */
+		wait_event(range->hmm->wait_queue,
+			   !atomic_read(&range->hmm->notifier_count));
+		goto again;
+	}
+	list_add_tail(&range->list, &range->hmm->monitors);
+	spin_unlock(&range->hmm->lock);
+	return true;
+}
+EXPORT_SYMBOL(hmm_vma_range_monitor_start);
+
+/*
+ * hmm_vma_range_monitor_end() - end monitoring of a range
+ * @range: range that was being monitored
+ * Returns: true if no invalidation since hmm_vma_range_monitor_start()
+ */
+bool hmm_vma_range_monitor_end(struct hmm_range *range)
+{
+	bool valid;
+
+	if (!range->hmm || list_empty(&range->list))
+		return false;
+
+	spin_lock(&range->hmm->lock);
+	valid = !list_empty(&range->list);
+	list_del_init(&range->list);
+	spin_unlock(&range->hmm->lock);
+
+	return valid;
+}
+EXPORT_SYMBOL(hmm_vma_range_monitor_end);
-- 
2.4.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-11-18 17:17 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-18 18:18 [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13 Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 01/18] mm/memory/hotplug: convert device parameter bool to set of flags Jérôme Glisse
2016-11-21  0:44   ` Balbir Singh
2016-11-21  4:53     ` Jerome Glisse
2016-11-21  6:57       ` Anshuman Khandual
2016-11-21 12:19         ` Jerome Glisse
2016-11-21  6:41   ` Anshuman Khandual
2016-11-21 12:27     ` Jerome Glisse
2016-11-22  5:35       ` Anshuman Khandual
2016-11-22 14:08         ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 02/18] mm/ZONE_DEVICE/unaddressable: add support for un-addressable device memory Jérôme Glisse
2016-11-21  8:06   ` Anshuman Khandual
2016-11-21 12:33     ` Jerome Glisse
2016-11-22  5:15       ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 03/18] mm/ZONE_DEVICE/free_hot_cold_page: catch ZONE_DEVICE pages Jérôme Glisse
2016-11-21  8:18   ` Anshuman Khandual
2016-11-21 12:50     ` Jerome Glisse
2016-11-22  4:30       ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 04/18] mm/ZONE_DEVICE/free-page: callback when page is freed Jérôme Glisse
2016-11-21  1:49   ` Balbir Singh
2016-11-21  4:57     ` Jerome Glisse
2016-11-21  8:26   ` Anshuman Khandual
2016-11-21 12:34     ` Jerome Glisse
2016-11-22  5:02       ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 05/18] mm/ZONE_DEVICE/devmem_pages_remove: allow early removal of device memory Jérôme Glisse
2016-11-21 10:37   ` Anshuman Khandual
2016-11-21 12:39     ` Jerome Glisse
2016-11-22  4:54       ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 06/18] mm/ZONE_DEVICE/unaddressable: add special swap for unaddressable Jérôme Glisse
2016-11-21  2:06   ` Balbir Singh
2016-11-21  5:05     ` Jerome Glisse
2016-11-22  2:19       ` Balbir Singh
2016-11-22 13:59         ` Jerome Glisse
2016-11-21 11:10     ` Anshuman Khandual
2016-11-21 10:58   ` Anshuman Khandual
2016-11-21 12:42     ` Jerome Glisse
2016-11-22  4:48       ` Anshuman Khandual
2016-11-24 13:56         ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 07/18] mm/ZONE_DEVICE/x86: add support for un-addressable device memory Jérôme Glisse
2016-11-21  2:08   ` Balbir Singh
2016-11-21  5:08     ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 08/18] mm/hmm: heterogeneous memory management (HMM for short) Jérôme Glisse
2016-11-21  2:29   ` Balbir Singh
2016-11-21  5:14     ` Jerome Glisse
2016-11-23  4:03   ` Anshuman Khandual
2016-11-27 13:10     ` Jerome Glisse
2016-11-28  2:58       ` Anshuman Khandual
2016-11-28  9:41         ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 09/18] mm/hmm/mirror: mirror process address space on device with HMM helpers Jérôme Glisse
2016-11-21  2:42   ` Balbir Singh
2016-11-21  5:18     ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 10/18] mm/hmm/mirror: add range lock helper, prevent CPU page table update for the range Jérôme Glisse
2016-11-18 18:18 ` Jérôme Glisse [this message]
2016-11-18 18:18 ` [HMM v13 12/18] mm/hmm/mirror: helper to snapshot CPU page table Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 13/18] mm/hmm/mirror: device page fault handler Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 14/18] mm/hmm/migrate: support un-addressable ZONE_DEVICE page in migration Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 15/18] mm/hmm/migrate: add new boolean copy flag to migratepage() callback Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 16/18] mm/hmm/migrate: new memory migration helper for use with device memory Jérôme Glisse
2016-11-18 19:57   ` Aneesh Kumar K.V
2016-11-18 20:15     ` Jerome Glisse
2016-11-19 14:32   ` Aneesh Kumar K.V
2016-11-19 17:17     ` Jerome Glisse
2016-11-20 18:21       ` Aneesh Kumar K.V
2016-11-20 20:06         ` Jerome Glisse
2016-11-21  3:30   ` Balbir Singh
2016-11-21  5:31     ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 17/18] mm/hmm/devmem: device driver helper to hotplug ZONE_DEVICE memory Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 18/18] mm/hmm/devmem: dummy HMM device as an helper for " Jérôme Glisse
2016-11-19  0:41 ` [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13 John Hubbard
2016-11-19 14:50   ` Aneesh Kumar K.V
2016-11-23  9:16 ` Haggai Eran
2016-11-25 16:16   ` Jerome Glisse
2016-11-27 13:27     ` Haggai Eran

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1479493107-982-12-git-send-email-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=SCheung@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=jakumar@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhairgrove@nvidia.com \
    --cc=sgutti@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox