linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	x86@kernel.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Arnd Bergmann <arnd@arndb.de>,
	"H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Michal Hocko <mhocko@suse.com>,
	linux-arch@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Xiong Zhang <xiong.y.zhang@intel.com>
Subject: [PATCH 11/26] x86/xen: convert __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
Date: Mon, 13 Mar 2017 08:50:05 +0300	[thread overview]
Message-ID: <20170313055020.69655-12-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20170313055020.69655-1-kirill.shutemov@linux.intel.com>

Split these helpers few per-level functions and add p4d support.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
[kirill.shutemov@linux.intel.com: split off into separate patch]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/xen/mmu.c | 245 ++++++++++++++++++++++++++++++++---------------------
 arch/x86/xen/mmu.h |   1 +
 2 files changed, 150 insertions(+), 96 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 37cb5aad71de..c49e165fde60 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 }
 #endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
+static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
+	for (i = 0; i < nr; i++) {
+		if (!pmd_none(pmd[i]))
+			flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+	}
+	return flush;
+}
+
+static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
+	for (i = 0; i < nr; i++) {
+		pmd_t *pmd;
+
+		if (pud_none(pud[i]))
+			continue;
+
+		pmd = pmd_offset(&pud[i], 0);
+		if (PTRS_PER_PMD > 1)
+			flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
+		flush |= xen_pmd_walk(mm, pmd, func,
+				last && i == nr - 1, limit);
+	}
+	return flush;
+}
+
+static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+		bool last, unsigned long limit)
+{
+	int i, nr, flush = 0;
+
+	nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+	for (i = 0; i < nr; i++) {
+		pud_t *pud;
+
+		if (p4d_none(p4d[i]))
+			continue;
+
+		pud = pud_offset(&p4d[i], 0);
+		if (PTRS_PER_PUD > 1)
+			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+		flush |= xen_pud_walk(mm, pud, func,
+				last && i == nr - 1, limit);
+	}
+	return flush;
+}
+
 /*
  * (Yet another) pagetable walker.  This one is intended for pinning a
  * pagetable.  This means that it walks a pagetable and calls the
@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 				      enum pt_level),
 			  unsigned long limit)
 {
-	int flush = 0;
+	int i, nr, flush = 0;
 	unsigned hole_low, hole_high;
-	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
-	unsigned pgdidx, pudidx, pmdidx;
 
 	/* The limit is the last byte to be touched */
 	limit--;
@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 	hole_low = pgd_index(USER_LIMIT);
 	hole_high = pgd_index(PAGE_OFFSET);
 
-	pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
-	pudidx_limit = pud_index(limit);
-#else
-	pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
-	pmdidx_limit = pmd_index(limit);
-#else
-	pmdidx_limit = 0;
-#endif
-
-	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
-		pud_t *pud;
+	nr = pgd_index(limit) + 1;
+	for (i = 0; i < nr; i++) {
+		p4d_t *p4d;
 
-		if (pgdidx >= hole_low && pgdidx < hole_high)
+		if (i >= hole_low && i < hole_high)
 			continue;
 
-		if (!pgd_val(pgd[pgdidx]))
+		if (pgd_none(pgd[i]))
 			continue;
 
-		pud = pud_offset(&pgd[pgdidx], 0);
-
-		if (PTRS_PER_PUD > 1) /* not folded */
-			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
-		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
-			pmd_t *pmd;
-
-			if (pgdidx == pgdidx_limit &&
-			    pudidx > pudidx_limit)
-				goto out;
-
-			if (pud_none(pud[pudidx]))
-				continue;
-
-			pmd = pmd_offset(&pud[pudidx], 0);
-
-			if (PTRS_PER_PMD > 1) /* not folded */
-				flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
-			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
-				struct page *pte;
-
-				if (pgdidx == pgdidx_limit &&
-				    pudidx == pudidx_limit &&
-				    pmdidx > pmdidx_limit)
-					goto out;
-
-				if (pmd_none(pmd[pmdidx]))
-					continue;
-
-				pte = pmd_page(pmd[pmdidx]);
-				flush |= (*func)(mm, pte, PT_PTE);
-			}
-		}
+		p4d = p4d_offset(&pgd[i], 0);
+		if (PTRS_PER_P4D > 1)
+			flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+		flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
 	}
 
-out:
 	/* Do the top level last, so that the callbacks can use it as
 	   a cue to do final things like tlb flushes. */
 	flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
 	xen_free_ro_pages(pa, PAGE_SIZE);
 }
 
+static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
+{
+	unsigned long pa;
+	pte_t *pte_tbl;
+	int i;
+
+	if (pmd_large(*pmd)) {
+		pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, PMD_SIZE);
+		return;
+	}
+
+	pte_tbl = pte_offset_kernel(pmd, 0);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		if (pte_none(pte_tbl[i]))
+			continue;
+		pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
+		xen_free_ro_pages(pa, PAGE_SIZE);
+	}
+	set_pmd(pmd, __pmd(0));
+	xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+{
+	unsigned long pa;
+	pmd_t *pmd_tbl;
+	int i;
+
+	if (pud_large(*pud)) {
+		pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, PUD_SIZE);
+		return;
+	}
+
+	pmd_tbl = pmd_offset(pud, 0);
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (pmd_none(pmd_tbl[i]))
+			continue;
+		xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
+	}
+	set_pud(pud, __pud(0));
+	xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
+{
+	unsigned long pa;
+	pud_t *pud_tbl;
+	int i;
+
+	if (p4d_large(*p4d)) {
+		pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
+		xen_free_ro_pages(pa, P4D_SIZE);
+		return;
+	}
+
+	pud_tbl = pud_offset(p4d, 0);
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		if (pud_none(pud_tbl[i]))
+			continue;
+		xen_cleanmfnmap_pud(pud_tbl + i, unpin);
+	}
+	set_p4d(p4d, __p4d(0));
+	xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
+}
+
 /*
  * Since it is well isolated we can (and since it is perhaps large we should)
  * also free the page tables mapping the initial P->M table.
  */
 static void __init xen_cleanmfnmap(unsigned long vaddr)
 {
-	unsigned long va = vaddr & PMD_MASK;
-	unsigned long pa;
-	pgd_t *pgd = pgd_offset_k(va);
-	pud_t *pud_page = pud_offset(pgd, 0);
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgd;
+	p4d_t *p4d;
 	unsigned int i;
 	bool unpin;
 
 	unpin = (vaddr == 2 * PGDIR_SIZE);
-	set_pgd(pgd, __pgd(0));
-	do {
-		pud = pud_page + pud_index(va);
-		if (pud_none(*pud)) {
-			va += PUD_SIZE;
-		} else if (pud_large(*pud)) {
-			pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
-			xen_free_ro_pages(pa, PUD_SIZE);
-			va += PUD_SIZE;
-		} else {
-			pmd = pmd_offset(pud, va);
-			if (pmd_large(*pmd)) {
-				pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
-				xen_free_ro_pages(pa, PMD_SIZE);
-			} else if (!pmd_none(*pmd)) {
-				pte = pte_offset_kernel(pmd, va);
-				set_pmd(pmd, __pmd(0));
-				for (i = 0; i < PTRS_PER_PTE; ++i) {
-					if (pte_none(pte[i]))
-						break;
-					pa = pte_pfn(pte[i]) << PAGE_SHIFT;
-					xen_free_ro_pages(pa, PAGE_SIZE);
-				}
-				xen_cleanmfnmap_free_pgtbl(pte, unpin);
-			}
-			va += PMD_SIZE;
-			if (pmd_index(va))
-				continue;
-			set_pud(pud, __pud(0));
-			xen_cleanmfnmap_free_pgtbl(pmd, unpin);
-		}
-
-	} while (pud_index(va) || pmd_index(va));
-	xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
+	vaddr &= PMD_MASK;
+	pgd = pgd_offset_k(vaddr);
+	p4d = p4d_offset(pgd, 0);
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		if (p4d_none(p4d[i]))
+			continue;
+		xen_cleanmfnmap_p4d(p4d + i, unpin);
+	}
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		set_pgd(pgd, __pgd(0));
+		xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+	}
 }
 
 static void __init xen_pagetable_p2m_free(void)
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb951b4..3fe2b3292915 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -5,6 +5,7 @@
 
 enum pt_level {
 	PT_PGD,
+	PT_P4D,
 	PT_PUD,
 	PT_PMD,
 	PT_PTE
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-03-13  5:50 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-13  5:49 [PATCH 00/26] x86: 5-level paging enabling for v4.12 Kirill A. Shutemov
2017-03-13  5:49 ` [PATCH 01/26] x86: basic changes into headers for 5-level paging Kirill A. Shutemov
2017-03-13  5:49 ` [PATCH 02/26] x86: trivial portion of 5-level paging conversion Kirill A. Shutemov
2017-03-13  5:49 ` [PATCH 03/26] x86/gup: add 5-level paging support Kirill A. Shutemov
2017-03-13  5:49 ` [PATCH 04/26] x86/ident_map: " Kirill A. Shutemov
2017-03-13  5:49 ` [PATCH 05/26] x86/mm: add support of p4d_t in vmalloc_fault() Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 06/26] x86/power: support p4d_t in hibernate code Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 07/26] x86/kexec: support p4d_t Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 08/26] x86/efi: handle p4d in EFI pagetables Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 09/26] x86/mm/pat: handle additional page table Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 10/26] x86/kasan: prepare clear_pgds() to switch to <asm-generic/pgtable-nop4d.h> Kirill A. Shutemov
2017-03-13  5:50 ` Kirill A. Shutemov [this message]
2017-03-13  5:50 ` [PATCH 12/26] x86: convert the rest of the code to support p4d_t Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 13/26] x86: detect 5-level paging support Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 14/26] x86/asm: remove __VIRTUAL_MASK_SHIFT==47 assert Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 15/26] x86/mm: define virtual memory map for 5-level paging Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 16/26] x86/paravirt: make paravirt code support " Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 17/26] x86/mm: basic defines/helpers for CONFIG_X86_5LEVEL Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 18/26] x86/dump_pagetables: support 5-level paging Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 19/26] x86/kasan: extend to " Kirill A. Shutemov
2017-03-13  7:25   ` Dmitry Vyukov
2017-03-13  5:50 ` [PATCH 20/26] x86/espfix: " Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 21/26] x86/mm: add support of additional page table level during early boot Kirill A. Shutemov
2017-03-13  7:18   ` Ingo Molnar
2017-04-05 11:36     ` Kirill A. Shutemov
2017-04-05 15:32       ` Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 22/26] x86/mm: add sync_global_pgds() for configuration with 5-level paging Kirill A. Shutemov
2017-03-13  7:22   ` Ingo Molnar
2017-03-13  5:50 ` [PATCH 23/26] x86/mm: make kernel_physical_mapping_init() support " Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 24/26] x86/mm: add support for 5-level paging for KASLR Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 25/26] x86: enable 5-level paging support Kirill A. Shutemov
2017-03-13  5:50 ` [PATCH 26/26] x86/mm: allow to have userspace mappings above 47-bits Kirill A. Shutemov
2017-03-17 17:53   ` Aneesh Kumar K.V
2017-03-17 17:57     ` Kirill A. Shutemov
2017-03-19  8:24       ` Aneesh Kumar K.V
2017-03-19  8:26         ` Kirill A. Shutemov
2017-03-20 18:08           ` hpa
2017-03-20 18:38             ` Matthew Wilcox
2017-03-24  8:59             ` Kirill A. Shutemov
2017-03-19  8:55         ` Aneesh Kumar K.V
2017-03-24  9:03           ` Kirill A. Shutemov
2017-03-20  9:15         ` Michael Ellerman
2017-03-20  5:10   ` Aneesh Kumar K.V
2017-03-24  9:04     ` Kirill A. Shutemov
2017-03-24  9:14       ` Aneesh Kumar K.V
2017-03-24  9:30         ` Kirill A. Shutemov
2017-03-13  7:49 ` [PATCH 00/26] x86: 5-level paging enabling for v4.12 Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170313055020.69655-12-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=dave.hansen@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@amacapital.net \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=xiong.y.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox