linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Hansen <dave.hansen@intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Christoph Lameter <cl@gentwo.org>,
	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
	Jerome Marchand <jmarchan@redhat.com>,
	Yang Shi <yang.shi@linaro.org>,
	Sasha Levin <sasha.levin@oracle.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv4 16/25] thp, mlock: do not mlock PTE-mapped file huge pages
Date: Sat, 12 Mar 2016 01:59:08 +0300	[thread overview]
Message-ID: <1457737157-38573-17-git-send-email-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <1457737157-38573-1-git-send-email-kirill.shutemov@linux.intel.com>

As with anon THP, we only mlock file huge pages if we can prove that the
page is not mapped with PTE. This way we can avoid mlock leak into
non-mlocked vma on split.

We rely on PageDoubleMap() under lock_page() to check if the the page
may be PTE mapped. PG_double_map is set by page_add_file_rmap() when the
page mapped with PTEs.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/page-flags.h | 13 ++++++++++++-
 mm/huge_memory.c           | 27 ++++++++++++++++++++-------
 mm/mmap.c                  |  6 ++++++
 mm/page_alloc.c            |  2 ++
 mm/rmap.c                  | 16 ++++++++++++++--
 5 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f4ed4f1b0c77..517707ae8cd1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -544,6 +544,17 @@ static inline int PageDoubleMap(struct page *page)
 	return PageHead(page) && test_bit(PG_double_map, &page[1].flags);
 }
 
+static inline void SetPageDoubleMap(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHead(page), page);
+	set_bit(PG_double_map, &page[1].flags);
+}
+
+static inline void ClearPageDoubleMap(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHead(page), page);
+	clear_bit(PG_double_map, &page[1].flags);
+}
 static inline int TestSetPageDoubleMap(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -560,7 +571,7 @@ static inline int TestClearPageDoubleMap(struct page *page)
 TESTPAGEFLAG_FALSE(TransHuge)
 TESTPAGEFLAG_FALSE(TransCompound)
 TESTPAGEFLAG_FALSE(TransTail)
-TESTPAGEFLAG_FALSE(DoubleMap)
+PAGEFLAG_FALSE(DoubleMap)
 	TESTSETFLAG_FALSE(DoubleMap)
 	TESTCLEARFLAG_FALSE(DoubleMap)
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ba5fdf654f27..a2680b2112e4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1427,6 +1427,8 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		 * We don't mlock() pte-mapped THPs. This way we can avoid
 		 * leaking mlocked pages into non-VM_LOCKED VMAs.
 		 *
+		 * For anon THP:
+		 *
 		 * In most cases the pmd is the only mapping of the page as we
 		 * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
 		 * writable private mappings in populate_vma_page_range().
@@ -1434,15 +1436,26 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		 * The only scenario when we have the page shared here is if we
 		 * mlocking read-only mapping shared over fork(). We skip
 		 * mlocking such pages.
+		 *
+		 * For file THP:
+		 *
+		 * We can expect PageDoubleMap() to be stable under page lock:
+		 * for file pages we set it in page_add_file_rmap(), which
+		 * requires page to be locked.
 		 */
-		if (compound_mapcount(page) == 1 && !PageDoubleMap(page) &&
-				page->mapping && trylock_page(page)) {
-			lru_add_drain();
-			if (page->mapping)
-				mlock_vma_page(page);
-			unlock_page(page);
-		}
+
+		if (PageAnon(page) && compound_mapcount(page) != 1)
+			goto skip_mlock;
+		if (PageDoubleMap(page) || !page->mapping)
+			goto skip_mlock;
+		if (!trylock_page(page))
+			goto skip_mlock;
+		lru_add_drain();
+		if (page->mapping && !PageDoubleMap(page))
+			mlock_vma_page(page);
+		unlock_page(page);
 	}
+skip_mlock:
 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 	if (flags & FOLL_GET)
diff --git a/mm/mmap.c b/mm/mmap.c
index 94979671b42c..1786d0b0244f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2576,6 +2576,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		/* drop PG_Mlocked flag for over-mapped range */
 		for (tmp = vma; tmp->vm_start >= start + size;
 				tmp = tmp->vm_next) {
+			/*
+			 * Split pmd and munlock page on the border
+			 * of the range.
+			 */
+			vma_adjust_trans_huge(tmp, start, start + size, 0);
+
 			munlock_vma_pages_range(tmp,
 					max(tmp->vm_start, start),
 					min(tmp->vm_end, start + size));
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6f042673e01..bce8b320fcce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1009,6 +1009,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
 
 	if (PageAnon(page))
 		page->mapping = NULL;
+	if (compound)
+		ClearPageDoubleMap(page);
 	bad += free_pages_check(page);
 	for (i = 1; i < (1 << order); i++) {
 		if (compound)
diff --git a/mm/rmap.c b/mm/rmap.c
index 359ec5cff9b0..f48258a78c8a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1298,6 +1298,12 @@ void page_add_file_rmap(struct page *page, bool compound)
 			goto out;
 		__inc_zone_page_state(page, NR_FILE_THP_MAPPED);
 	} else {
+		if (PageTransCompound(page)) {
+			VM_BUG_ON_PAGE(!PageLocked(page), page);
+			SetPageDoubleMap(compound_head(page));
+			if (PageMlocked(page))
+				clear_page_mlock(compound_head(page));
+		}
 		if (!atomic_inc_and_test(&page->_mapcount))
 			goto out;
 	}
@@ -1472,8 +1478,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 */
 	if (!(flags & TTU_IGNORE_MLOCK)) {
 		if (vma->vm_flags & VM_LOCKED) {
-			/* Holding pte lock, we do *not* need mmap_sem here */
-			mlock_vma_page(page);
+			/* PTE-mapped THP are never mlocked */
+			if (!PageTransCompound(page)) {
+				/*
+				 * Holding pte lock, we do *not* need
+				 * mmap_sem here
+				 */
+				mlock_vma_page(page);
+			}
 			ret = SWAP_MLOCK;
 			goto out_unmap;
 		}
-- 
2.7.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-03-11 23:06 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-11 22:58 [PATCHv4 00/25] THP-enabled tmpfs/shmem Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 01/25] mm: do not pass mm_struct into handle_mm_fault Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 02/25] mm: introduce fault_env Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 03/25] mm: postpone page table allocation until we have page to map Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 04/25] rmap: support file thp Kirill A. Shutemov
2016-03-18  9:40   ` Aneesh Kumar K.V
2016-03-19  1:01     ` Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 05/25] mm: introduce do_set_pmd() Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 06/25] mm, rmap: account file thp pages Kirill A. Shutemov
2016-03-15 15:30   ` [PATCHv5 " Kirill A. Shutemov
2016-03-11 22:58 ` [PATCHv4 07/25] thp, vmstats: add counters for huge file pages Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 08/25] thp: support file pages in zap_huge_pmd() Kirill A. Shutemov
2016-03-18 13:53   ` Aneesh Kumar K.V
2016-03-19  1:02     ` Kirill A. Shutemov
2016-03-21  4:33       ` Aneesh Kumar K.V
2016-03-21 14:39         ` Kirill A. Shutemov
2016-03-21 16:42           ` Aneesh Kumar K.V
2016-03-11 22:59 ` [PATCHv4 09/25] thp: handle file pages in split_huge_pmd() Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 10/25] thp: handle file COW faults Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 11/25] thp: handle file pages in mremap() Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 12/25] thp: skip file huge pmd on copy_huge_pmd() Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 13/25] thp: prepare change_huge_pmd() for file thp Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 14/25] thp: run vma_adjust_trans_huge() outside i_mmap_rwsem Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 15/25] thp: file pages support for split_huge_page() Kirill A. Shutemov
2016-03-11 22:59 ` Kirill A. Shutemov [this message]
2016-03-11 22:59 ` [PATCHv4 17/25] vmscan: split file huge pages before paging them out Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 18/25] page-flags: relax policy for PG_mappedtodisk and PG_reclaim Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 19/25] radix-tree: implement radix_tree_maybe_preload_order() Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 20/25] filemap: prepare find and delete operations for huge pages Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 21/25] truncate: handle file thp Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 22/25] shmem: prepare huge= mount option and sysfs knob Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 23/25] shmem: get_unmapped_area align huge page Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 24/25] shmem: add huge pages support Kirill A. Shutemov
2016-03-11 22:59 ` [PATCHv4 25/25] shmem, thp: respect MADV_{NO,}HUGEPAGE for file mappings Kirill A. Shutemov
2016-03-15 15:52 ` [PATCHv5 26/25] thp: update Documentation/vm/transhuge.txt Kirill A. Shutemov
2016-03-23 20:09 ` [PATCHv4 00/25] THP-enabled tmpfs/shmem Hugh Dickins
2016-03-24  9:17   ` Kirill A. Shutemov
2016-03-24 19:08     ` Hugh Dickins
2016-03-25 15:04       ` Kirill A. Shutemov
2016-03-26  0:00         ` Hugh Dickins
2016-03-28 18:00           ` Kirill A. Shutemov
2016-03-28 18:42             ` Hugh Dickins
2016-03-28 12:29   ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457737157-38573-17-git-send-email-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@gentwo.org \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=jmarchan@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=sasha.levin@oracle.com \
    --cc=vbabka@suse.cz \
    --cc=yang.shi@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox