From: Ning Qu <quning@google.com>
To: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Hugh Dickins <hughd@google.com>,
Al Viro <viro@zeniv.linux.org.uk>,
Wu Fengguang <fengguang.wu@intel.com>, Jan Kara <jack@suse.cz>,
Mel Gorman <mgorman@suse.de>,
linux-mm@kvack.org, Andi Kleen <ak@linux.intel.com>,
Matthew Wilcox <willy@linux.intel.com>,
Hillf Danton <dhillf@gmail.com>, Dave Hansen <dave@sr71.net>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 04/12] mm, thp, tmpfs: split huge page when moving from page cache to swap
Date: Fri, 18 Oct 2013 11:16:20 -0700 [thread overview]
Message-ID: <20131018181620.GA6970@hippobay.mtv.corp.google.com> (raw)
In-Reply-To: <CACz4_2eoRoyUU1G3veS=veWTi1HtPrgLQK0tyXONXcQj1Xi4EQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 7499 bytes --]
New patch below with handle all the pages after splitted.
---
include/linux/huge_mm.h | 2 ++
mm/shmem.c | 79 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 65f90db..58b0208 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -64,6 +64,7 @@ extern pmd_t *page_check_address_pmd(struct page *page,
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_NR_PAGES HPAGE_PMD_NR
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
@@ -207,6 +208,7 @@ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vm
#define THP_READ_ALLOC_FAILED ({ BUILD_BUG(); 0; })
#define hpage_nr_pages(x) 1
+#define HPAGE_NR_PAGES 1
#define transparent_hugepage_enabled(__vma) 0
#define transparent_hugepage_defrag(__vma) 0
diff --git a/mm/shmem.c b/mm/shmem.c
index 5bde8d0..b80ace7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -862,14 +862,16 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
struct shmem_inode_info *info;
struct address_space *mapping;
struct inode *inode;
- swp_entry_t swap;
+ swp_entry_t swap[HPAGE_NR_PAGES];
pgoff_t index;
+ int nr = 1;
+ int i;
BUG_ON(!PageLocked(page));
mapping = page->mapping;
- index = page->index;
inode = mapping->host;
info = SHMEM_I(inode);
+
if (info->flags & VM_LOCKED)
goto redirty;
if (!total_swap_pages)
@@ -887,6 +889,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
goto redirty;
}
+ index = page->index;
/*
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
* value into swapfile.c, the only way we can correctly account for a
@@ -906,21 +909,35 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (shmem_falloc &&
index >= shmem_falloc->start &&
index < shmem_falloc->next)
- shmem_falloc->nr_unswapped++;
+ shmem_falloc->nr_unswapped +=
+ hpagecache_nr_pages(page);
else
shmem_falloc = NULL;
spin_unlock(&inode->i_lock);
if (shmem_falloc)
goto redirty;
}
- clear_highpage(page);
+ clear_pagecache_page(page);
flush_dcache_page(page);
SetPageUptodate(page);
}
- swap = get_swap_page();
- if (!swap.val)
- goto redirty;
+ /* We can only have nr correct after huge page splitted,
+ * otherwise, it will fail the redirty logic
+ */
+ nr = hpagecache_nr_pages(page);
+ /* We have to break the huge page at this point,
+ * since we have no idea how to swap a huge page.
+ */
+ if (PageTransHugeCache(page))
+ split_huge_page(compound_trans_head(page));
+
+ /* Pre-allocate all the swap pages */
+ for (i = 0; i < nr; i++) {
+ swap[i] = get_swap_page();
+ if (!swap[i].val)
+ goto undo_alloc_swap;
+ }
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
@@ -934,25 +951,47 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (list_empty(&info->swaplist))
list_add_tail(&info->swaplist, &shmem_swaplist);
- if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- swap_shmem_alloc(swap);
- shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+ for (i = 0; i < nr; i++) {
+ if (add_to_swap_cache(page + i, swap[i], GFP_ATOMIC))
+ goto undo_add_to_swap_cache;
+ }
- spin_lock(&info->lock);
- info->swapped++;
- shmem_recalc_inode(inode);
- spin_unlock(&info->lock);
+ /* We make sure everything is correct before moving further */
+ for (i = 0; i < nr; i++) {
+ swap_shmem_alloc(swap[i]);
+ shmem_delete_from_page_cache(page + i,
+ swp_to_radix_entry(swap[i]));
+ }
- mutex_unlock(&shmem_swaplist_mutex);
- BUG_ON(page_mapped(page));
- swap_writepage(page, wbc);
- return 0;
+ spin_lock(&info->lock);
+ info->swapped += nr;
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+
+ mutex_unlock(&shmem_swaplist_mutex);
+
+ for (i = 0; i < nr; i++) {
+ BUG_ON(page_mapped(page + i));
+ swap_writepage(page + i, wbc);
}
+ return 0;
+
+undo_add_to_swap_cache:
+ while (i) {
+ i--;
+ __delete_from_swap_cache(page + i);
+ }
mutex_unlock(&shmem_swaplist_mutex);
- swapcache_free(swap, NULL);
+ i = nr;
+undo_alloc_swap:
+ while (i) {
+ i--;
+ swapcache_free(swap[i], NULL);
+ }
redirty:
- set_page_dirty(page);
+ for (i = 0; i < nr; i++)
+ set_page_dirty(page + i);
if (wbc->for_reclaim)
return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
unlock_page(page);
--
Best wishes,
--
Ning Qu (ae?2a(R)?) | Software Engineer | quning@google.com | +1-408-418-6066
On Tue, Oct 15, 2013 at 12:00 PM, Ning Qu <quning@google.com> wrote:
> Let me take another look at that logic. Thanks!
> Best wishes,
> --
> Ning Qu (ae?2a(R)?) | Software Engineer | quning@google.com | +1-408-418-6066
>
>
> On Tue, Oct 15, 2013 at 3:33 AM, Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> > Ning Qu wrote:
> >> in shmem_writepage, we have to split the huge page when moving pages
> >> from page cache to swap because we don't support huge page in swap
> >> yet.
> >>
> >> Signed-off-by: Ning Qu <quning@gmail.com>
> >> ---
> >> mm/shmem.c | 9 ++++++++-
> >> 1 file changed, 8 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/mm/shmem.c b/mm/shmem.c
> >> index 8fe17dd..68a0e1d 100644
> >> --- a/mm/shmem.c
> >> +++ b/mm/shmem.c
> >> @@ -898,6 +898,13 @@ static int shmem_writepage(struct page *page,
> struct writeback_control *wbc)
> >> swp_entry_t swap;
> >> pgoff_t index;
> >>
> >> + /* TODO: we have to break the huge page at this point,
> >> + * since we have no idea how to recover a huge page from
> >> + * swap.
> >> + */
> >> + if (PageTransCompound(page))
> >> + split_huge_page(compound_trans_head(page));
> >> +
> >
> > After the split you handle here only first small page of the huge page.
> > Is it what we want to do? Should we swap out all small pages of the huge
> > page?
> >
> > --
> > Kirill A. Shutemov
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a hrefmailto:"dont@kvack.org"> email@kvack.org </a>
>
[-- Attachment #2: Type: text/html, Size: 4834 bytes --]
prev parent reply other threads:[~2013-10-18 18:16 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-15 0:12 Ning Qu
2013-10-15 10:33 ` Kirill A. Shutemov
2013-10-15 19:00 ` Ning Qu
2013-10-18 18:16 ` Ning Qu [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20131018181620.GA6970@hippobay.mtv.corp.google.com \
--to=quning@google.com \
--cc=aarcange@redhat.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=dave@sr71.net \
--cc=dhillf@gmail.com \
--cc=fengguang.wu@intel.com \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox