linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][RFC]: pte notifiers -- support for external page tables
@ 2007-09-05 16:38 Avi Kivity
  2007-09-05 19:05 ` Rik van Riel
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Avi Kivity @ 2007-09-05 16:38 UTC (permalink / raw)
  To: lkml, linux-mm; +Cc: shaohua.li, kvm, general, Avi Kivity

Some hardware and software systems maintain page tables outside the normal
Linux page tables, which reference userspace memory.  This includes
Infiniband, other RDMA-capable devices, and kvm (with a pending patch).

Because these systems maintain external page tables (and external tlbs),
Linux cannot demand page this memory and it must be locked.  For kvm at
least, this is a significant reduction in functionality.

This sample patch adds a new mechanism, pte notifiers, that allows drivers
to register an interest in a changes to ptes. Whenever Linux changes a
pte, it will call a notifier to allow the driver to adjust the external
page table and flush its tlb.

Note that only one notifier is implemented, ->clear(), but others should be
similar.

pte notifiers are different from paravirt_ops: they extend the normal
page tables rather than replace them; and they provide high-level information
such as the vma and the virtual address for the driver to use.

Signed-off-by: Avi Kivity <avi@qumranet.com>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 655094d..5d2bbee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
 #include <linux/debug_locks.h>
 #include <linux/backing-dev.h>
 #include <linux/mm_types.h>
+#include <linux/pte_notifier.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -108,6 +109,9 @@ struct vm_area_struct {
 #ifndef CONFIG_MMU
 	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
 #endif
+#ifdef CONFIG_PTE_NOTIFIERS
+	struct list_head pte_notifier_list;
+#endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
diff --git a/include/linux/pte_notifier.h b/include/linux/pte_notifier.h
new file mode 100644
index 0000000..d28832b
--- /dev/null
+++ b/include/linux/pte_notifier.h
@@ -0,0 +1,52 @@
+#ifndef _LINUX_PTE_NOTIFIER_H
+#define _LINUX_PTE_NOTIFIER_H
+
+#include <linux/list.h>
+
+struct vm_area_struct;
+
+#ifdef CONFIG_PTE_NOTIFIERS
+
+struct pte_notifier;
+
+struct pte_notifier_ops {
+	void (*close)(struct pte_notifier *pn, struct vm_area_struct *vma);
+	void (*clear)(struct pte_notifier *pn, struct vm_area_struct *vma,
+		      unsigned long address);
+};
+
+struct pte_notifier {
+	struct list_head link;
+	const struct pte_notifier_ops *ops;
+};
+
+
+void vma_init_pte_notifiers(struct vm_area_struct *vma);
+void vma_close_pte_notifiers(struct vm_area_struct *vma);
+void pte_notifier_register(struct pte_notifier *pn,
+			   struct vm_area_struct *vma);
+void pte_notifier_unregister(struct pte_notifier *pn);
+
+#define pte_notifier_call(vma, function, args...)			\
+	do {								\
+		struct pte_notifier *__pn;				\
+									\
+		list_for_each_entry(__pn, &vma->pte_notifier_list, link) \
+			__pn->ops->function(__pn, vma, args);		\
+	} while (0)
+
+#else
+
+static inline void vma_init_pte_notifiers(struct vm_area_struct *vma) {}
+static inline void vma_close_pte_notifiers(struct vm_area_struct *vma) {}
+static inline void pte_notifier_register(struct pte_notifier *pn,
+					 struct vm_area_struct *vma) {}
+static inline void pte_notifier_unregister(struct pte_notifier *pn) {}
+
+#define pte_notifier_call(vma, function, args...) \
+	do { } while (0)
+
+#endif
+
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index e24d348..7b10151 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -176,3 +176,6 @@ config NR_QUICK
 config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
+
+config PTE_NOTIFIERS
+       bool
diff --git a/mm/Makefile b/mm/Makefile
index 245e33a..59f6a03 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,4 +29,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
+obj-$(CONFIG_PTE_NOTIFIERS) += pte_notifiers.o
 
diff --git a/mm/mmap.c b/mm/mmap.c
index b653721..cc6c4fe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1134,6 +1134,7 @@ munmap_back:
 	vma->vm_page_prot = protection_map[vm_flags &
 				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
 	vma->vm_pgoff = pgoff;
+	vma_init_pte_notifiers(vma);
 
 	if (file) {
 		error = -EINVAL;
diff --git a/mm/pte_notifier.c b/mm/pte_notifier.c
new file mode 100644
index 0000000..0b9076c
--- /dev/null
+++ b/mm/pte_notifier.c
@@ -0,0 +1,32 @@
+
+#include <linux/pte_notifier.h>
+
+void vma_init_pte_notifiers(struct vm_area_struct *vma)
+{
+	INIT_LIST_HEAD(&vma->pte_notifier_list);
+}
+EXPORT_SYMBOL_GPL(vma_init_pte_notifiers);
+
+void vma_destroy_pte_notifiers(struct vm_area_struct *vma)
+{
+	struct pte_notifier *pn;
+	struct list_head *n;
+
+	list_for_each_entry_safe(pn, n, &vma->pte_notifier_list, link) {
+		pn->ops->close(__pn, vma);
+		__list_del(n);
+	}
+}
+
+void pte_notifier_register(struct pte_notifier *pn, struct vm_area_struct *vma)
+{
+	list_add(&pn->link, &vma->pte_notifier_list);
+}
+EXPORT_SYMBOL_GPL(pte_notifier_register);
+
+void pte_notifier_unregister(struct pte_notifier *pn)
+{
+	list_del(&pn->link);
+}
+EXPORT_SYMBOL_GPL(pte_notifier_unregister);
+
diff --git a/mm/rmap.c b/mm/rmap.c
index 41ac397..3f61d38 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -682,6 +682,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	/* Nuke the page table entry. */
+	pte_notifier_call(vma, clear, address);
 	flush_cache_page(vma, address, page_to_pfn(page));
 	pteval = ptep_clear_flush(vma, address, pte);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 16:38 [PATCH][RFC]: pte notifiers -- support for external page tables Avi Kivity
@ 2007-09-05 19:05 ` Rik van Riel
  2007-09-05 19:14   ` Avi Kivity
  2007-09-05 20:40 ` Jack Steiner
  2007-09-06  6:24 ` [ofa-general] " Gleb Natapov
  2 siblings, 1 reply; 15+ messages in thread
From: Rik van Riel @ 2007-09-05 19:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: lkml, linux-mm, shaohua.li, kvm, general

Avi Kivity wrote:

> This sample patch adds a new mechanism, pte notifiers, that allows drivers
> to register an interest in a changes to ptes. Whenever Linux changes a
> pte, it will call a notifier to allow the driver to adjust the external
> page table and flush its tlb.
> 
> Note that only one notifier is implemented, ->clear(), but others should be
> similar.

This approach makes a lot of sense.

> diff --git a/mm/rmap.c b/mm/rmap.c
> index 41ac397..3f61d38 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -682,6 +682,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>  	}
>  
>  	/* Nuke the page table entry. */
> +	pte_notifier_call(vma, clear, address);
>  	flush_cache_page(vma, address, page_to_pfn(page));
>  	pteval = ptep_clear_flush(vma, address, pte);

If you want this to be useful to Infiniband, you should probably
also hook up do_wp_page() in mm/memory.c, where a page table can
be pointed to another page.

Probably the code in mm/mremap.c will need to be hooked up too.

-- 
Politics is the struggle between those who want to make their country
the best in the world, and those who believe it already is.  Each group
calls the other unpatriotic.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 19:05 ` Rik van Riel
@ 2007-09-05 19:14   ` Avi Kivity
  2007-09-05 19:23     ` Rik van Riel
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-09-05 19:14 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-mm, shaohua.li, kvm, general

Rik van Riel wrote:

>> diff --git a/mm/rmap.c b/mm/rmap.c
>> index 41ac397..3f61d38 100644
>> --- a/mm/rmap.c
>> +++ b/mm/rmap.c
>> @@ -682,6 +682,7 @@ static int try_to_unmap_one(struct page *page, 
>> struct vm_area_struct *vma,
>>      }
>>  
>>      /* Nuke the page table entry. */
>> +    pte_notifier_call(vma, clear, address);
>>      flush_cache_page(vma, address, page_to_pfn(page));
>>      pteval = ptep_clear_flush(vma, address, pte);
>
> If you want this to be useful to Infiniband, you should probably
> also hook up do_wp_page() in mm/memory.c, where a page table can
> be pointed to another page.
>
> Probably the code in mm/mremap.c will need to be hooked up too.
>

I imagine that many of the paravirt_ops mmu hooks will need to be 
exposed as pte notifiers.  This can't be done as part of the 
paravirt_ops code due to the need to pass high level data structures, 
though.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 19:14   ` Avi Kivity
@ 2007-09-05 19:23     ` Rik van Riel
  2007-09-05 19:32       ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Rik van Riel @ 2007-09-05 19:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: linux-mm, shaohua.li, kvm, general

Avi Kivity wrote:
> Rik van Riel wrote:
> 
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index 41ac397..3f61d38 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -682,6 +682,7 @@ static int try_to_unmap_one(struct page *page, 
>>> struct vm_area_struct *vma,
>>>      }
>>>  
>>>      /* Nuke the page table entry. */
>>> +    pte_notifier_call(vma, clear, address);
>>>      flush_cache_page(vma, address, page_to_pfn(page));
>>>      pteval = ptep_clear_flush(vma, address, pte);
>>
>> If you want this to be useful to Infiniband, you should probably
>> also hook up do_wp_page() in mm/memory.c, where a page table can
>> be pointed to another page.
>>
>> Probably the code in mm/mremap.c will need to be hooked up too.
>>
> 
> I imagine that many of the paravirt_ops mmu hooks will need to be 
> exposed as pte notifiers.  This can't be done as part of the 
> paravirt_ops code due to the need to pass high level data structures, 
> though.

Wait, I thought that paravirt_ops was all on the side of the
guest kernel, where these host kernel operations are invisible?

-- 
Politics is the struggle between those who want to make their country
the best in the world, and those who believe it already is.  Each group
calls the other unpatriotic.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 19:23     ` Rik van Riel
@ 2007-09-05 19:32       ` Avi Kivity
  2007-09-06 11:28         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-09-05 19:32 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-mm, shaohua.li, kvm-devel, general, linux-kernel

Rik van Riel wrote:
>>
>> I imagine that many of the paravirt_ops mmu hooks will need to be 
>> exposed as pte notifiers.  This can't be done as part of the 
>> paravirt_ops code due to the need to pass high level data structures, 
>> though.
>
> Wait, I thought that paravirt_ops was all on the side of the
> guest kernel, where these host kernel operations are invisible?
>

It is, but the hooks are in much the same places.  It could be argued 
that you'd embed pte notifiers in paravirt_ops for a host kernel, but 
that's not doable because pte notifiers use higher-level data strutures 
(like vmas).

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 16:38 [PATCH][RFC]: pte notifiers -- support for external page tables Avi Kivity
  2007-09-05 19:05 ` Rik van Riel
@ 2007-09-05 20:40 ` Jack Steiner
  2007-09-05 20:40   ` Avi Kivity
  2007-09-05 20:42   ` Avi Kivity
  2007-09-06  6:24 ` [ofa-general] " Gleb Natapov
  2 siblings, 2 replies; 15+ messages in thread
From: Jack Steiner @ 2007-09-05 20:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: lkml, linux-mm, shaohua.li, kvm, general

On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
> Some hardware and software systems maintain page tables outside the normal
> Linux page tables, which reference userspace memory.  This includes
> Infiniband, other RDMA-capable devices, and kvm (with a pending patch).
> 

I like it. 

We have 2 special devices with external TLBs that can
take advantage of this.

One suggestion - at least for what we need. Can the notifier be
registered against the mm_struct instead of (or in addition to) the
vma?


---jack

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 20:40 ` Jack Steiner
@ 2007-09-05 20:40   ` Avi Kivity
  2007-09-05 20:42   ` Avi Kivity
  1 sibling, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2007-09-05 20:40 UTC (permalink / raw)
  To: Jack Steiner; +Cc: lkml, linux-mm, shaohua.li, kvm, general

Jack Steiner wrote:
> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>   
>> Some hardware and software systems maintain page tables outside the normal
>> Linux page tables, which reference userspace memory.  This includes
>> Infiniband, other RDMA-capable devices, and kvm (with a pending patch).
>>
>>     
>
> I like it. 
>
> We have 2 special devices with external TLBs that can
> take advantage of this.
>
> One suggestion - at least for what we need. Can the notifier be
> registered against the mm_struct instead of (or in addition to) the
> vma?
>   

Yes.  It's a lot simpler since this way we don't have to support vma 
creation/splitting/merging/destruction.  There's a tiny performance hit 
for kvm, but it isn't worth the bother.

Will implement for v2 of this patch.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 20:40 ` Jack Steiner
  2007-09-05 20:40   ` Avi Kivity
@ 2007-09-05 20:42   ` Avi Kivity
  1 sibling, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2007-09-05 20:42 UTC (permalink / raw)
  To: Jack Steiner; +Cc: linux-kernel, linux-mm, shaohua.li, kvm-devel, general

[resend due to broken cc list in my original post]

Jack Steiner wrote:
> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>   
>> Some hardware and software systems maintain page tables outside the normal
>> Linux page tables, which reference userspace memory.  This includes
>> Infiniband, other RDMA-capable devices, and kvm (with a pending patch).
>>
>>     
>
> I like it. 
>
> We have 2 special devices with external TLBs that can
> take advantage of this.
>
> One suggestion - at least for what we need. Can the notifier be
> registered against the mm_struct instead of (or in addition to) the
> vma?
>   

Yes.  It's a lot simpler since this way we don't have to support vma
creation/splitting/merging/destruction.  There's a tiny performance hit
for kvm, but it isn't worth the bother.

Will implement for v2 of this patch.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 16:38 [PATCH][RFC]: pte notifiers -- support for external page tables Avi Kivity
  2007-09-05 19:05 ` Rik van Riel
  2007-09-05 20:40 ` Jack Steiner
@ 2007-09-06  6:24 ` Gleb Natapov
  2007-09-06  8:35   ` Avi Kivity
  2 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2007-09-06  6:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: lkml, linux-mm, kvm, shaohua.li, general, addy

On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
> This sample patch adds a new mechanism, pte notifiers, that allows drivers
> to register an interest in a changes to ptes. Whenever Linux changes a
> pte, it will call a notifier to allow the driver to adjust the external
> page table and flush its tlb.
How is this different from http://lwn.net/Articles/133627/? AFAIR the
patch was rejected because there was only one user for it and it was
decided that it would be better to maintain it out of tree for a while.

--
			Gleb.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-06  6:24 ` [ofa-general] " Gleb Natapov
@ 2007-09-06  8:35   ` Avi Kivity
  2007-09-06  8:41     ` Gleb Natapov
  2007-09-10 18:17     ` Andrew Hastings
  0 siblings, 2 replies; 15+ messages in thread
From: Avi Kivity @ 2007-09-06  8:35 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: lkml, linux-mm, kvm, shaohua.li, general, addy

Gleb Natapov wrote:
> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>   
>> This sample patch adds a new mechanism, pte notifiers, that allows drivers
>> to register an interest in a changes to ptes. Whenever Linux changes a
>> pte, it will call a notifier to allow the driver to adjust the external
>> page table and flush its tlb.
>>     
> How is this different from http://lwn.net/Articles/133627/? AFAIR the
> patch was rejected because there was only one user for it and it was
> decided that it would be better to maintain it out of tree for a while.
>   

Your patch is more complete.

There are now at least three users: you, kvm, and newer Infiniband 
HCAs.  Care to resurrect the patch?

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-06  8:35   ` Avi Kivity
@ 2007-09-06  8:41     ` Gleb Natapov
  2007-09-10 18:17     ` Andrew Hastings
  1 sibling, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2007-09-06  8:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: lkml, linux-mm, kvm, shaohua.li, general, addy

On Thu, Sep 06, 2007 at 11:35:24AM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>>   
>>> This sample patch adds a new mechanism, pte notifiers, that allows 
>>> drivers
>>> to register an interest in a changes to ptes. Whenever Linux changes a
>>> pte, it will call a notifier to allow the driver to adjust the external
>>> page table and flush its tlb.
>>>     
>> How is this different from http://lwn.net/Articles/133627/? AFAIR the
>> patch was rejected because there was only one user for it and it was
>> decided that it would be better to maintain it out of tree for a while.
>>   
>
> Your patch is more complete.
>
> There are now at least three users: you, kvm, and newer Infiniband HCAs.  
> Care to resurrect the patch?
>
This is not my patch :) This is patch written by David Addison from
Quadrics. I CCed him on my previous email. I just saw that you are
trying to do something similar.

--
			Gleb.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-05 19:32       ` Avi Kivity
@ 2007-09-06 11:28         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 15+ messages in thread
From: Jeremy Fitzhardinge @ 2007-09-06 11:28 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Rik van Riel, linux-mm, shaohua.li, kvm-devel, general, linux-kernel

Avi Kivity wrote:
> It is, but the hooks are in much the same places.  It could be argued
> that you'd embed pte notifiers in paravirt_ops for a host kernel, but
> that's not doable because pte notifiers use higher-level data
> strutures (like vmas).

Also, I wouldn't like to preclude the possibility of having a kernel
that's both a guest and a host (ie, nested vmms).

    J

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-06  8:35   ` Avi Kivity
  2007-09-06  8:41     ` Gleb Natapov
@ 2007-09-10 18:17     ` Andrew Hastings
  2007-09-11 10:37       ` Daniel J Blueman
  1 sibling, 1 reply; 15+ messages in thread
From: Andrew Hastings @ 2007-09-10 18:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Daniel Blueman, linux-mm

Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>>  
>>> This sample patch adds a new mechanism, pte notifiers, that allows 
>>> drivers
>>> to register an interest in a changes to ptes. Whenever Linux changes a
>>> pte, it will call a notifier to allow the driver to adjust the external
>>> page table and flush its tlb.
>>>     
>> How is this different from http://lwn.net/Articles/133627/? AFAIR the
>> patch was rejected because there was only one user for it and it was
>> decided that it would be better to maintain it out of tree for a while.
>>   
> 
> Your patch is more complete.
> 
> There are now at least three users: you, kvm, and newer Infiniband 
> HCAs.  Care to resurrect the patch?

We (Cray) also use the ioproc patch.  AFAIK the current maintainer is 
Dan Blueman at Quadrics.

-Andrew Hastings
  Cray Inc.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-10 18:17     ` Andrew Hastings
@ 2007-09-11 10:37       ` Daniel J Blueman
  2007-09-11 11:19         ` Gleb Natapov
  0 siblings, 1 reply; 15+ messages in thread
From: Daniel J Blueman @ 2007-09-11 10:37 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Andrew Hastings, linux-mm

Andrew Hastings wrote:
> Avi Kivity wrote:
>> Gleb Natapov wrote:
>>> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>>>  
>>>> This sample patch adds a new mechanism, pte notifiers, that allows 
>>>> drivers
>>>> to register an interest in a changes to ptes. Whenever Linux changes a
>>>> pte, it will call a notifier to allow the driver to adjust the external
>>>> page table and flush its tlb.
>>>>     
>>> How is this different from http://lwn.net/Articles/133627/? AFAIR the
>>> patch was rejected because there was only one user for it and it was
>>> decided that it would be better to maintain it out of tree for a while.
>>>   
>>
>> Your patch is more complete.
>>
>> There are now at least three users: you, kvm, and newer Infiniband 
>> HCAs.  Care to resurrect the patch?
> 
> We (Cray) also use the ioproc patch.  AFAIK the current maintainer is 
> Dan Blueman at Quadrics.

I should add that the IOPROC patches are maintained internally to 
loosely track mainline kernels; however, we do not generally release [1] 
these until they've passed quite a lot of validation (driven by customer 
demand mostly) on various configurations.

Quite a few large users/groups would benefit from this; the IOPROC 
patches have been stable for quite a while now, so are a good option.

If you have any feedback/suggestions that would help forward progress, 
I'm happy to hear and address them.

Thanks,
   Daniel

--- [1]

http://www.quadrics.com/patches
-- 
Daniel J Blueman
Software Engineer, Quadrics Ltd

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [ofa-general] [PATCH][RFC]: pte notifiers -- support for external page tables
  2007-09-11 10:37       ` Daniel J Blueman
@ 2007-09-11 11:19         ` Gleb Natapov
  0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2007-09-11 11:19 UTC (permalink / raw)
  To: Daniel J Blueman; +Cc: Avi Kivity, Andrew Hastings, linux-mm

On Tue, Sep 11, 2007 at 11:37:50AM +0100, Daniel J Blueman wrote:
> Andrew Hastings wrote:
>> Avi Kivity wrote:
>>> Gleb Natapov wrote:
>>>> On Wed, Sep 05, 2007 at 07:38:48PM +0300, Avi Kivity wrote:
>>>>  
>>>>> This sample patch adds a new mechanism, pte notifiers, that allows 
>>>>> drivers
>>>>> to register an interest in a changes to ptes. Whenever Linux changes a
>>>>> pte, it will call a notifier to allow the driver to adjust the external
>>>>> page table and flush its tlb.
>>>>>     
>>>> How is this different from http://lwn.net/Articles/133627/? AFAIR the
>>>> patch was rejected because there was only one user for it and it was
>>>> decided that it would be better to maintain it out of tree for a while.
>>>>   
>>>
>>> Your patch is more complete.
>>>
>>> There are now at least three users: you, kvm, and newer Infiniband HCAs.  
>>> Care to resurrect the patch?
>> We (Cray) also use the ioproc patch.  AFAIK the current maintainer is Dan 
>> Blueman at Quadrics.
>
> I should add that the IOPROC patches are maintained internally to loosely 
> track mainline kernels; however, we do not generally release [1] these 
> until they've passed quite a lot of validation (driven by customer demand 
> mostly) on various configurations.
>
> Quite a few large users/groups would benefit from this; the IOPROC patches 
> have been stable for quite a while now, so are a good option.
>
> If you have any feedback/suggestions that would help forward progress, I'm 
> happy to hear and address them.
>
Posting the patch against current kernel (-mm or mainline) here would
be certainly helpful.

Thanks,

--
			Gleb.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-09-11 11:19 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-05 16:38 [PATCH][RFC]: pte notifiers -- support for external page tables Avi Kivity
2007-09-05 19:05 ` Rik van Riel
2007-09-05 19:14   ` Avi Kivity
2007-09-05 19:23     ` Rik van Riel
2007-09-05 19:32       ` Avi Kivity
2007-09-06 11:28         ` Jeremy Fitzhardinge
2007-09-05 20:40 ` Jack Steiner
2007-09-05 20:40   ` Avi Kivity
2007-09-05 20:42   ` Avi Kivity
2007-09-06  6:24 ` [ofa-general] " Gleb Natapov
2007-09-06  8:35   ` Avi Kivity
2007-09-06  8:41     ` Gleb Natapov
2007-09-10 18:17     ` Andrew Hastings
2007-09-11 10:37       ` Daniel J Blueman
2007-09-11 11:19         ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox