From: Carsten Otte <cotte@de.ibm.com>
From: Heiko Carstens <heiko.carstens@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
To: virtualization@lists.linux-foundation.org,
kvm-devel@lists.sourceforge.net, Avi Kivity <avi@qumranet.com>,
Nick Piggin <npiggin@suse.de>,
Andrew Morton <akpm@linux-foundation.org>,
hugh@veritas.com,
Linux Memory Management List <linux-mm@kvack.org>
Cc: schwidefsky@de.ibm.com, heiko.carstens@de.ibm.com, os@de.ibm.com,
borntraeger@de.ibm.com, hollisb@us.ibm.com, EHRHARDT@de.ibm.com,
jeroney@us.ibm.com, aliguori@us.ibm.com, jblunck@suse.de,
rvdheij@gmail.com, rusty@rustcorp.com.au, arnd@arndb.de, "Zhang,
Xiantao" <xiantao.zhang@intel.com>
Subject: [RFC/PATCH 02/15 v2] preparation: host memory management changes for s390 kvm
Date: Sat, 22 Mar 2008 18:02:39 +0100 [thread overview]
Message-ID: <1206205359.7177.84.camel@cotte.boeblingen.de.ibm.com> (raw)
In-Reply-To: <1206203560.7177.45.camel@cotte.boeblingen.de.ibm.com>
This patch changes the s390 memory management defintions to use the pgste field
for dirty and reference bit tracking of host and guest code. Usually on s390,
dirty and referenced are tracked in storage keys, which belong to the physical
page. This changes with virtualization: The guest and host dirty/reference bits
are defined to be the logical OR of the values for the mapping and the physical
page. This patch implements the necessary changes in pgtable.h for s390.
There is a common code change in mm/rmap.c, the call to page_test_and_clear_young
must be moved. This is a no-op for all architecture but s390. page_referenced
checks the referenced bits for the physiscal page and for all mappings:
o The physical page is checked with page_test_and_clear_young.
o The mappings are checked with ptep_test_and_clear_young and friends.
Without pgstes (the current implementation on Linux s390) the physical page
check is implemented but the mapping callbacks are no-ops because dirty
and referenced are not tracked in the s390 page tables. The pgstes introduces
guest and host dirty and reference bits for s390 in the host mapping. These
mapping must be checked before page_test_and_clear_young resets the reference
bit.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
include/asm-s390/pgtable.h | 109 +++++++++++++++++++++++++++++++++++++++++++--
mm/rmap.c | 7 +-
2 files changed, 110 insertions(+), 6 deletions(-)
Index: linux-host/include/asm-s390/pgtable.h
===================================================================
--- linux-host.orig/include/asm-s390/pgtable.h
+++ linux-host/include/asm-s390/pgtable.h
@@ -30,6 +30,7 @@
*/
#ifndef __ASSEMBLY__
#include <linux/mm_types.h>
+#include <asm/atomic.h>
#include <asm/bug.h>
#include <asm/processor.h>
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE];
* swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
*/
+/* Page status extended for virtualization */
+#define _PAGE_RCP_PCL 0x0080000000000000UL
+#define _PAGE_RCP_HR 0x0040000000000000UL
+#define _PAGE_RCP_HC 0x0020000000000000UL
+#define _PAGE_RCP_GR 0x0004000000000000UL
+#define _PAGE_RCP_GC 0x0002000000000000UL
+
#ifndef __s390x__
/* Bits in the segment table address-space-control-element */
@@ -513,6 +521,67 @@ static inline int pte_file(pte_t pte)
#define __HAVE_ARCH_PTE_SAME
#define pte_same(a,b) (pte_val(a) == pte_val(b))
+static inline void rcp_lock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+ preempt_disable();
+ atomic64_set_mask(_PAGE_RCP_PCL, rcp);
+#endif
+}
+
+static inline void rcp_unlock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+ atomic64_clear_mask(_PAGE_RCP_PCL, rcp);
+ preempt_enable();
+#endif
+}
+
+static inline void rcp_set_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+ *(unsigned long *) (ptep + PTRS_PER_PTE) |= val;
+#endif
+}
+
+static inline int rcp_test_and_clear_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long ret;
+
+ ret = *(unsigned long *) (ptep + PTRS_PER_PTE);
+ *(unsigned long *) (ptep + PTRS_PER_PTE) &= ~val;
+ return (ret & val) == val;
+#else
+ return 0;
+#endif
+}
+
+
+/* forward declaration for SetPageUptodate in page-flags.h*/
+static inline void page_clear_dirty(struct page *page);
+#include <linux/page-flags.h>
+
+static inline void ptep_rcp_copy(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ struct page *page = virt_to_page(pte_val(*ptep));
+ unsigned int skey;
+
+ skey = page_get_storage_key(page_to_phys(page));
+ if (skey & _PAGE_CHANGED)
+ rcp_set_bits(ptep, _PAGE_RCP_GC);
+ if (skey & _PAGE_REFERENCED)
+ rcp_set_bits(ptep, _PAGE_RCP_GR);
+ if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HC))
+ SetPageDirty(page);
+ if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR))
+ SetPageReferenced(page);
+#endif
+}
+
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -599,6 +668,8 @@ static inline void pmd_clear(pmd_t *pmd)
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
+ if (mm->context.pgstes)
+ ptep_rcp_copy(ptep);
pte_val(*ptep) = _PAGE_TYPE_EMPTY;
if (mm->context.noexec)
pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
@@ -667,6 +738,22 @@ static inline pte_t pte_mkyoung(pte_t pt
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_PGSTE
+ unsigned long physpage;
+ int young;
+
+ if (!vma->vm_mm->context.pgstes)
+ return 0;
+ physpage = pte_val(*ptep) & PAGE_MASK;
+
+ young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
+ rcp_lock(ptep);
+ if (young)
+ rcp_set_bits(ptep, _PAGE_RCP_GR);
+ young |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR);
+ rcp_unlock(ptep);
+ return young;
+#endif
return 0;
}
@@ -674,7 +761,13 @@ static inline int ptep_test_and_clear_yo
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- /* No need to flush TLB; bits are in storage key */
+ /* No need to flush TLB
+ * On s390 reference bits are in storage key and never in TLB
+ * With virtualization we handle the reference bit, without we
+ * we can simply return */
+#ifdef CONFIG_PGSTE
+ return ptep_test_and_clear_young(vma, address, ptep);
+#endif
return 0;
}
@@ -693,15 +786,25 @@ static inline void __ptep_ipte(unsigned
: "=m" (*ptep) : "m" (*ptep),
"a" (pto), "a" (address));
}
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
}
static inline void ptep_invalidate(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
+ if (mm->context.pgstes) {
+ rcp_lock(ptep);
+ __ptep_ipte(address, ptep);
+ ptep_rcp_copy(ptep);
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ rcp_unlock(ptep);
+ return;
+ }
__ptep_ipte(address, ptep);
- if (mm->context.noexec)
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ if (mm->context.noexec) {
__ptep_ipte(address, ptep + PTRS_PER_PTE);
+ pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
+ }
}
/*
Index: linux-host/mm/rmap.c
===================================================================
--- linux-host.orig/mm/rmap.c
+++ linux-host/mm/rmap.c
@@ -413,9 +413,6 @@ int page_referenced(struct page *page, i
{
int referenced = 0;
- if (page_test_and_clear_young(page))
- referenced++;
-
if (TestClearPageReferenced(page))
referenced++;
@@ -433,6 +430,10 @@ int page_referenced(struct page *page, i
unlock_page(page);
}
}
+
+ if (page_test_and_clear_young(page))
+ referenced++;
+
return referenced;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-22 17:02 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1206030270.6690.51.camel@cotte.boeblingen.de.ibm.com>
2008-03-22 17:02 ` [RFC/PATCH 00/15 v2] kvm on big iron Carsten Otte
2008-03-25 17:47 ` [RFC/PATCH 00/15 v3] " Carsten Otte
2008-03-27 12:02 ` Avi Kivity
[not found] ` <1206458154.6217.12.camel@cotte.boeblingen.de.ibm.com>
2008-03-25 17:47 ` [RFC/PATCH 01/15 v3] preparation: provide hook to enable pgstes in user pagetable Carsten Otte, Martin Schwidefsky, Carsten Otte
2008-03-25 17:47 ` [RFC/PATCH 02/15 v3] preparation: host memory management changes for s390 kvm Carsten Otte, Heiko Carstens, Christian Borntraeger
[not found] ` <1206203560.7177.45.camel@cotte.boeblingen.de.ibm.com>
2008-03-22 17:02 ` [RFC/PATCH 01/15 v2] preparation: provide hook to enable pgstes in user pagetable Carsten Otte, Martin Schwidefsky
2008-03-24 21:50 ` Andrew Morton
2008-03-22 17:02 ` Carsten Otte, Heiko Carstens, Christian Borntraeger [this message]
2008-03-24 21:52 ` [RFC/PATCH 02/15 v2] preparation: host memory management changes for s390 kvm Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1206205359.7177.84.camel@cotte.boeblingen.de.ibm.com \
--to=cotte@de.ibm.com \
--cc=EHRHARDT@de.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=aliguori@us.ibm.com \
--cc=arnd@arndb.de \
--cc=avi@qumranet.com \
--cc=borntraeger@de.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=hollisb@us.ibm.com \
--cc=hugh@veritas.com \
--cc=jblunck@suse.de \
--cc=jeroney@us.ibm.com \
--cc=kvm-devel@lists.sourceforge.net \
--cc=linux-mm@kvack.org \
--cc=npiggin@suse.de \
--cc=os@de.ibm.com \
--cc=rusty@rustcorp.com.au \
--cc=rvdheij@gmail.com \
--cc=schwidefsky@de.ibm.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=xiantao.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox