From: Hugh Dickins <hughd@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Andres Lagar-Cavilla <andreslc@google.com>,
Yang Shi <yang.shi@linaro.org>, Ning Qu <quning@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 18/31] huge tmpfs: mem_cgroup move charge on shmem huge pages
Date: Tue, 5 Apr 2016 14:44:07 -0700 (PDT) [thread overview]
Message-ID: <alpine.LSU.2.11.1604051441190.5965@eggly.anvils> (raw)
In-Reply-To: <alpine.LSU.2.11.1604051403210.5965@eggly.anvils>
Early on, for simplicity, we disabled moving huge tmpfs pages from
one memcg to another (nowadays only required when moving a task into a
memcg having move_charge_at_immigrate exceptionally set). We're about
to add a couple of memcg stats for huge tmpfs, and will need to confront
how to handle moving those stats, so better enable moving the pages now.
Although they're discovered by the pmd's get_mctgt_type_thp(), they
have to be considered page by page, in what's usually the pte scan:
because although the common case is for each member of the team to be
owned by the same memcg, nowhere is that enforced - perhaps one day
we shall need to enforce such a limitation, but not so far.
Signed-off-by: Hugh Dickins <hughd@google.com>
---
mm/memcontrol.c | 103 +++++++++++++++++++++++++---------------------
1 file changed, 58 insertions(+), 45 deletions(-)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4332,6 +4332,7 @@ static int mem_cgroup_do_precharge(unsig
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
* target for charge migration. if @target is not NULL, the entry is stored
* in target->ent.
+ * 3(MC_TARGET_TEAM): if pmd entry is not an anon THP: check it page by page
*
* Called with pte lock held.
*/
@@ -4344,6 +4345,7 @@ enum mc_target_type {
MC_TARGET_NONE = 0,
MC_TARGET_PAGE,
MC_TARGET_SWAP,
+ MC_TARGET_TEAM,
};
static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
@@ -4565,19 +4567,22 @@ static enum mc_target_type get_mctgt_typ
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
- * We don't consider swapping or file mapped pages because THP does not
- * support them for now.
* Caller should make sure that pmd_trans_huge(pmd) is true.
*/
-static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
- unsigned long addr, pmd_t pmd, union mc_target *target)
+static enum mc_target_type get_mctgt_type_thp(pmd_t pmd,
+ union mc_target *target, unsigned long *pfn)
{
- struct page *page = NULL;
+ struct page *page;
enum mc_target_type ret = MC_TARGET_NONE;
page = pmd_page(pmd);
- /* Don't attempt to move huge tmpfs pages yet: can be enabled later */
- if (!(mc.flags & MOVE_ANON) || !PageAnon(page))
+ if (!PageAnon(page)) {
+ if (!(mc.flags & MOVE_FILE))
+ return ret;
+ *pfn = page_to_pfn(page);
+ return MC_TARGET_TEAM;
+ }
+ if (!(mc.flags & MOVE_ANON))
return ret;
if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
@@ -4589,8 +4594,8 @@ static enum mc_target_type get_mctgt_typ
return ret;
}
#else
-static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
- unsigned long addr, pmd_t pmd, union mc_target *target)
+static inline enum mc_target_type get_mctgt_type_thp(pmd_t pmd,
+ union mc_target *target, unsigned long *pfn)
{
return MC_TARGET_NONE;
}
@@ -4601,24 +4606,33 @@ static int mem_cgroup_count_precharge_pt
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
- pte_t *pte;
+ enum mc_target_type target_type;
+ unsigned long uninitialized_var(pfn);
+ pte_t ptent;
+ pte_t *pte = NULL;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
- if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
+ target_type = get_mctgt_type_thp(*pmd, NULL, &pfn);
+ if (target_type == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
- spin_unlock(ptl);
- return 0;
+ if (target_type != MC_TARGET_TEAM)
+ goto unlock;
+ } else {
+ if (pmd_trans_unstable(pmd))
+ return 0;
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE)
- if (get_mctgt_type(vma, addr, *pte, NULL))
+ for (; addr != end; addr += PAGE_SIZE) {
+ ptent = pte ? *(pte++) : pfn_pte(pfn++, vma->vm_page_prot);
+ if (get_mctgt_type(vma, addr, ptent, NULL))
mc.precharge++; /* increment precharge temporarily */
- pte_unmap_unlock(pte - 1, ptl);
+ }
+ if (pte)
+ pte_unmap(pte - 1);
+unlock:
+ spin_unlock(ptl);
cond_resched();
return 0;
@@ -4787,22 +4801,21 @@ static int mem_cgroup_move_charge_pte_ra
{
int ret = 0;
struct vm_area_struct *vma = walk->vma;
- pte_t *pte;
+ unsigned long uninitialized_var(pfn);
+ pte_t ptent;
+ pte_t *pte = NULL;
spinlock_t *ptl;
enum mc_target_type target_type;
union mc_target target;
struct page *page;
-
+retry:
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
- if (mc.precharge < HPAGE_PMD_NR) {
- spin_unlock(ptl);
- return 0;
- }
- target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
+ target_type = get_mctgt_type_thp(*pmd, &target, &pfn);
if (target_type == MC_TARGET_PAGE) {
page = target.page;
- if (!isolate_lru_page(page)) {
+ if (mc.precharge >= HPAGE_PMD_NR &&
+ !isolate_lru_page(page)) {
if (!mem_cgroup_move_account(page, true,
mc.from, mc.to)) {
mc.precharge -= HPAGE_PMD_NR;
@@ -4811,22 +4824,19 @@ static int mem_cgroup_move_charge_pte_ra
putback_lru_page(page);
}
put_page(page);
+ addr = end;
}
- spin_unlock(ptl);
- return 0;
+ if (target_type != MC_TARGET_TEAM)
+ goto unlock;
+ /* addr is not aligned when retrying after precharge ran out */
+ pfn += (addr & (HPAGE_PMD_SIZE-1)) >> PAGE_SHIFT;
+ } else {
+ if (pmd_trans_unstable(pmd))
+ return 0;
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
-retry:
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; addr += PAGE_SIZE) {
- pte_t ptent = *(pte++);
- swp_entry_t ent;
-
- if (!mc.precharge)
- break;
-
+ for (; addr != end && mc.precharge; addr += PAGE_SIZE) {
+ ptent = pte ? *(pte++) : pfn_pte(pfn++, vma->vm_page_prot);
switch (get_mctgt_type(vma, addr, ptent, &target)) {
case MC_TARGET_PAGE:
page = target.page;
@@ -4851,8 +4861,8 @@ put: /* get_mctgt_type() gets the page
put_page(page);
break;
case MC_TARGET_SWAP:
- ent = target.ent;
- if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
+ if (!mem_cgroup_move_swap_account(target.ent,
+ mc.from, mc.to)) {
mc.precharge--;
/* we fixup refcnts and charges later. */
mc.moved_swap++;
@@ -4862,7 +4872,10 @@ put: /* get_mctgt_type() gets the page
break;
}
}
- pte_unmap_unlock(pte - 1, ptl);
+ if (pte)
+ pte_unmap(pte - 1);
+unlock:
+ spin_unlock(ptl);
cond_resched();
if (addr != end) {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-04-05 21:44 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-05 21:10 [PATCH 00/31] huge tmpfs: THPagecache implemented by teams Hugh Dickins
2016-04-05 21:12 ` [PATCH 01/31] huge tmpfs: prepare counts in meminfo, vmstat and SysRq-m Hugh Dickins
2016-04-11 11:05 ` Kirill A. Shutemov
2016-04-17 2:28 ` Hugh Dickins
2016-04-05 21:13 ` [PATCH 02/31] huge tmpfs: include shmem freeholes in available memory Hugh Dickins
2016-04-05 21:15 ` [PATCH 03/31] huge tmpfs: huge=N mount option and /proc/sys/vm/shmem_huge Hugh Dickins
2016-04-11 11:17 ` Kirill A. Shutemov
2016-04-17 2:00 ` Hugh Dickins
2016-04-05 21:16 ` [PATCH 04/31] huge tmpfs: try to allocate huge pages, split into a team Hugh Dickins
2016-04-05 21:17 ` [PATCH 05/31] huge tmpfs: avoid team pages in a few places Hugh Dickins
2016-04-05 21:20 ` [PATCH 06/31] huge tmpfs: shrinker to migrate and free underused holes Hugh Dickins
2016-04-05 21:21 ` [PATCH 07/31] huge tmpfs: get_unmapped_area align & fault supply huge page Hugh Dickins
2016-04-05 21:23 ` [PATCH 08/31] huge tmpfs: try_to_unmap_one use page_check_address_transhuge Hugh Dickins
2016-04-05 21:24 ` [PATCH 09/31] huge tmpfs: avoid premature exposure of new pagetable Hugh Dickins
2016-04-11 11:54 ` Kirill A. Shutemov
2016-04-17 1:49 ` Hugh Dickins
2016-04-05 21:25 ` [PATCH 10/31] huge tmpfs: map shmem by huge page pmd or by page team ptes Hugh Dickins
2016-04-05 21:29 ` [PATCH 11/31] huge tmpfs: disband split huge pmds on race or memory failure Hugh Dickins
2016-04-05 21:33 ` [PATCH 12/31] huge tmpfs: extend get_user_pages_fast to shmem pmd Hugh Dickins
2016-04-06 7:00 ` Ingo Molnar
2016-04-07 2:53 ` Hugh Dickins
2016-04-13 8:58 ` Ingo Molnar
2016-04-05 21:34 ` [PATCH 13/31] huge tmpfs: use Unevictable lru with variable hpage_nr_pages Hugh Dickins
2016-04-05 21:35 ` [PATCH 14/31] huge tmpfs: fix Mlocked meminfo, track huge & unhuge mlocks Hugh Dickins
2016-04-05 21:37 ` [PATCH 15/31] huge tmpfs: fix Mapped meminfo, track huge & unhuge mappings Hugh Dickins
2016-04-05 21:39 ` [PATCH 16/31] kvm: plumb return of hva when resolving page fault Hugh Dickins
2016-04-05 21:41 ` [PATCH 17/31] kvm: teach kvm to map page teams as huge pages Hugh Dickins
2016-04-05 23:37 ` Paolo Bonzini
2016-04-06 1:12 ` Hugh Dickins
2016-04-06 6:47 ` Paolo Bonzini
2016-04-06 6:56 ` Andres Lagar-Cavilla
2016-04-05 21:44 ` Hugh Dickins [this message]
2016-04-05 21:46 ` [PATCH 19/31] huge tmpfs: mem_cgroup shmem_pmdmapped accounting Hugh Dickins
2016-04-05 21:47 ` [PATCH 20/31] huge tmpfs: mem_cgroup shmem_hugepages accounting Hugh Dickins
2016-04-05 21:49 ` [PATCH 21/31] huge tmpfs: show page team flag in pageflags Hugh Dickins
2016-04-05 21:51 ` [PATCH 22/31] huge tmpfs: /proc/<pid>/smaps show ShmemHugePages Hugh Dickins
2016-04-05 21:53 ` [PATCH 23/31] huge tmpfs recovery: framework for reconstituting huge pages Hugh Dickins
2016-04-06 10:28 ` Mika Penttilä
2016-04-07 2:05 ` Hugh Dickins
2016-04-05 21:54 ` [PATCH 24/31] huge tmpfs recovery: shmem_recovery_populate to fill huge page Hugh Dickins
2016-04-05 21:56 ` [PATCH 25/31] huge tmpfs recovery: shmem_recovery_remap & remap_team_by_pmd Hugh Dickins
2016-04-05 21:58 ` [PATCH 26/31] huge tmpfs recovery: shmem_recovery_swapin to read from swap Hugh Dickins
2016-04-05 22:00 ` [PATCH 27/31] huge tmpfs recovery: tweak shmem_getpage_gfp to fill team Hugh Dickins
2016-04-05 22:02 ` [PATCH 28/31] huge tmpfs recovery: debugfs stats to complete this phase Hugh Dickins
2016-04-05 22:03 ` [PATCH 29/31] huge tmpfs recovery: page migration call back into shmem Hugh Dickins
2016-04-05 22:05 ` [PATCH 30/31] huge tmpfs: shmem_huge_gfpmask and shmem_recovery_gfpmask Hugh Dickins
2016-04-05 22:07 ` [PATCH 31/31] huge tmpfs: no kswapd by default on sync allocations Hugh Dickins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.LSU.2.11.1604051441190.5965@eggly.anvils \
--to=hughd@google.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=andreslc@google.com \
--cc=hannes@cmpxchg.org \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=quning@gmail.com \
--cc=yang.shi@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox