From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from psmtp.com (na3sys010amx151.postini.com [74.125.245.151]) by kanga.kvack.org (Postfix) with SMTP id 089276B0031 for ; Thu, 5 Sep 2013 17:53:45 -0400 (EDT) Received: by mail-vb0-f48.google.com with SMTP id w16so1607476vbf.35 for ; Thu, 05 Sep 2013 14:53:45 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: <1375582645-29274-4-git-send-email-kirill.shutemov@linux.intel.com> References: <1375582645-29274-1-git-send-email-kirill.shutemov@linux.intel.com> <1375582645-29274-4-git-send-email-kirill.shutemov@linux.intel.com> From: Ning Qu Date: Thu, 5 Sep 2013 14:53:24 -0700 Message-ID: Subject: Re: [PATCH 03/23] thp: compile-time and sysfs knob for thp pagecache Content-Type: multipart/alternative; boundary=001a11c2c158ba3eed04e5a9f455 Sender: owner-linux-mm@kvack.org List-ID: To: "Kirill A. Shutemov" Cc: Andrea Arcangeli , Andrew Morton , Al Viro , Hugh Dickins , Wu Fengguang , Jan Kara , Mel Gorman , linux-mm@kvack.org, Andi Kleen , Matthew Wilcox , "Kirill A. Shutemov" , Hillf Danton , Dave Hansen , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org --001a11c2c158ba3eed04e5a9f455 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable One minor question inline. Best wishes, --=20 Ning Qu (=E6=9B=B2=E5=AE=81) | Software Engineer | quning@google.com | +1-4= 08-418-6066 On Sat, Aug 3, 2013 at 7:17 PM, Kirill A. Shutemov < kirill.shutemov@linux.intel.com> wrote: > From: "Kirill A. Shutemov" > > For now, TRANSPARENT_HUGEPAGE_PAGECACHE is only implemented for x86_64. > > Radix tree perload overhead can be significant on BASE_SMALL systems, so > let's add dependency on !BASE_SMALL. > > /sys/kernel/mm/transparent_hugepage/page_cache is runtime knob for the > feature. It's enabled by default if TRANSPARENT_HUGEPAGE_PAGECACHE is > enabled. > > Signed-off-by: Kirill A. Shutemov > --- > Documentation/vm/transhuge.txt | 9 +++++++++ > include/linux/huge_mm.h | 9 +++++++++ > mm/Kconfig | 12 ++++++++++++ > mm/huge_memory.c | 23 +++++++++++++++++++++++ > 4 files changed, 53 insertions(+) > > diff --git a/Documentation/vm/transhuge.txt > b/Documentation/vm/transhuge.txt > index 4a63953..4cc15c4 100644 > --- a/Documentation/vm/transhuge.txt > +++ b/Documentation/vm/transhuge.txt > @@ -103,6 +103,15 @@ echo always > >/sys/kernel/mm/transparent_hugepage/enabled > echo madvise >/sys/kernel/mm/transparent_hugepage/enabled > echo never >/sys/kernel/mm/transparent_hugepage/enabled > > +If TRANSPARENT_HUGEPAGE_PAGECACHE is enabled kernel will use huge pages = in > +page cache if possible. It can be disable and re-enabled via sysfs: > + > +echo 0 >/sys/kernel/mm/transparent_hugepage/page_cache > +echo 1 >/sys/kernel/mm/transparent_hugepage/page_cache > + > +If it's disabled kernel will not add new huge pages to page cache and > +split them on mapping, but already mapped pages will stay intakt. > + > It's also possible to limit defrag efforts in the VM to generate > hugepages in case they're not immediately free to madvise regions or > to never try to defrag memory and simply fallback to regular pages > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index 3935428..1534e1e 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -40,6 +40,7 @@ enum transparent_hugepage_flag { > TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, > TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, > TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, > + TRANSPARENT_HUGEPAGE_PAGECACHE, > TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, > #ifdef CONFIG_DEBUG_VM > TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, > @@ -229,4 +230,12 @@ static inline int do_huge_pmd_numa_page(struct > mm_struct *mm, struct vm_area_str > > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > > +static inline bool transparent_hugepage_pagecache(void) > +{ > + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE)) > + return false; > + if (!(transparent_hugepage_flags & (1< Here, I suppose we should test the TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG as well? E.g. if (!(transparent_hugepage_flags & ((1< + return transparent_hugepage_flags & > (1< +} > #endif /* _LINUX_HUGE_MM_H */ > diff --git a/mm/Kconfig b/mm/Kconfig > index 256bfd0..1e30ee8 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -420,6 +420,18 @@ choice > benefit. > endchoice > > +config TRANSPARENT_HUGEPAGE_PAGECACHE > + bool "Transparent Hugepage Support for page cache" > + depends on X86_64 && TRANSPARENT_HUGEPAGE > + # avoid radix tree preload overhead > + depends on !BASE_SMALL > + default y > + help > + Enabling the option adds support hugepages for file-backed > + mappings. It requires transparent hugepage support from > + filesystem side. For now, the only filesystem which supports > + hugepages is ramfs. > + > config CROSS_MEMORY_ATTACH > bool "Cross Memory Support" > depends on MMU > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index d96d921..523946c 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -42,6 +42,9 @@ unsigned long transparent_hugepage_flags __read_mostly = =3D > #endif > (1< (1< +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE > + (1< +#endif > (1< > /* default scan 8*512 pte (or vmas) every 30 second */ > @@ -362,6 +365,23 @@ static ssize_t defrag_store(struct kobject *kobj, > static struct kobj_attribute defrag_attr =3D > __ATTR(defrag, 0644, defrag_show, defrag_store); > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE > +static ssize_t page_cache_show(struct kobject *kobj, > + struct kobj_attribute *attr, char *buf) > +{ > + return single_flag_show(kobj, attr, buf, > + TRANSPARENT_HUGEPAGE_PAGECACHE); > +} > +static ssize_t page_cache_store(struct kobject *kobj, > + struct kobj_attribute *attr, const char *buf, size_t coun= t) > +{ > + return single_flag_store(kobj, attr, buf, count, > + TRANSPARENT_HUGEPAGE_PAGECACHE); > +} > +static struct kobj_attribute page_cache_attr =3D > + __ATTR(page_cache, 0644, page_cache_show, page_cache_store); > +#endif > + > static ssize_t use_zero_page_show(struct kobject *kobj, > struct kobj_attribute *attr, char *buf) > { > @@ -397,6 +417,9 @@ static struct kobj_attribute debug_cow_attr =3D > static struct attribute *hugepage_attr[] =3D { > &enabled_attr.attr, > &defrag_attr.attr, > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE > + &page_cache_attr.attr, > +#endif > &use_zero_page_attr.attr, > #ifdef CONFIG_DEBUG_VM > &debug_cow_attr.attr, > -- > 1.8.3.2 > > --001a11c2c158ba3eed04e5a9f455 Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: quoted-printable
One minor question inline.
<= br clear=3D"all">
Best wishes,
--=C2= =A0
Ning Qu (=E6=9B=B2= =E5=AE=81)=C2=A0|=C2=A0Software Engineer |=C2=A0quning@google.com=C2=A0|=C2=A0+1-408-418-6066


On Sat, Aug 3, 2013 at 7:17 PM, Kirill A= . Shutemov <kirill.shutemov@linux.intel.com> w= rote:
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
For now, TRANSPARENT_HUGEPAGE_PAGECACHE is only implemented for x86_64.

Radix tree perload overhead can be significant on BASE_SMALL systems, so let's add dependency on !BASE_SMALL.

/sys/kernel/mm/transparent_hugepage/page_cache is runtime knob for the
feature. It's enabled by default if TRANSPARENT_HUGEPAGE_PAGECACHE is enabled.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
=C2=A0Documentation/vm/transhuge.txt | =C2=A09 +++++++++
=C2=A0include/linux/huge_mm.h =C2=A0 =C2=A0 =C2=A0 =C2=A0| =C2=A09 ++++++++= +
=C2=A0mm/Kconfig =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 | 12 ++++++++++++
=C2=A0mm/huge_memory.c =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 | 2= 3 +++++++++++++++++++++++
=C2=A04 files changed, 53 insertions(+)

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.tx= t
index 4a63953..4cc15c4 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -103,6 +103,15 @@ echo always >/sys/kernel/mm/transparent_hugepage/en= abled
=C2=A0echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
=C2=A0echo never >/sys/kernel/mm/transparent_hugepage/enabled

+If TRANSPARENT_HUGEPAGE_PAGECACHE is enabled kernel will use huge pages in=
+page cache if possible. It can be disable and re-enabled via sysfs:
+
+echo 0 >/sys/kernel/mm/transparent_hugepage/page_cache
+echo 1 >/sys/kernel/mm/transparent_hugepage/page_cache
+
+If it's disabled kernel will not add new huge pages to page cache and<= br> +split them on mapping, but already mapped pages will stay intakt.
+
=C2=A0It's also possible to limit defrag efforts in the VM to generate<= br> =C2=A0hugepages in case they're not immediately free to madvise regions= or
=C2=A0to never try to defrag memory and simply fallback to regular pages diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3935428..1534e1e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -40,6 +40,7 @@ enum transparent_hugepage_flag {
=C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_PAGECACHE,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
=C2=A0#ifdef CONFIG_DEBUG_VM
=C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
@@ -229,4 +230,12 @@ static inline int do_huge_pmd_numa_page(struct mm_stru= ct *mm, struct vm_area_str

=C2=A0#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

+static inline bool transparent_hugepage_pagecache(void)
+{
+ =C2=A0 =C2=A0 =C2=A0 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACH= E))
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return false;
+ =C2=A0 =C2=A0 =C2=A0 if (!(transparent_hugepage_flags & (1<<TRA= NSPARENT_HUGEPAGE_FLAG)))

Here, I suppo= se we should test the =C2=A0TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG as well? E.g= .
=C2=A0 =C2=A0 =C2=A0 =C2=A0 if (!(transparent_hugepage_flags &
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 ((1<<TRANSPARENT_H= UGEPAGE_FLAG) |
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))))

=
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return false;
+ =C2=A0 =C2=A0 =C2=A0 return transparent_hugepage_flags & (1<<TR= ANSPARENT_HUGEPAGE_PAGECACHE);
+}
=C2=A0#endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 256bfd0..1e30ee8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -420,6 +420,18 @@ choice
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 benefit.
=C2=A0endchoice

+config TRANSPARENT_HUGEPAGE_PAGECACHE
+ =C2=A0 =C2=A0 =C2=A0 bool "Transparent Hugepage Support for page cac= he"
+ =C2=A0 =C2=A0 =C2=A0 depends on X86_64 && TRANSPARENT_HUGEPAGE + =C2=A0 =C2=A0 =C2=A0 # avoid radix tree preload overhead
+ =C2=A0 =C2=A0 =C2=A0 depends on !BASE_SMALL
+ =C2=A0 =C2=A0 =C2=A0 default y
+ =C2=A0 =C2=A0 =C2=A0 help
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 Enabling the option adds support hugepages fo= r file-backed
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 mappings. It requires transparent hugepage su= pport from
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 filesystem side. For now, the only filesystem= which supports
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 hugepages is ramfs.
+
=C2=A0config CROSS_MEMORY_ATTACH
=C2=A0 =C2=A0 =C2=A0 =C2=A0 bool "Cross Memory Support"
=C2=A0 =C2=A0 =C2=A0 =C2=A0 depends on MMU
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d96d921..523946c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -42,6 +42,9 @@ unsigned long transparent_hugepage_flags __read_mostly = =3D
=C2=A0#endif
=C2=A0 =C2=A0 =C2=A0 =C2=A0 (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)| =C2=A0 =C2=A0 =C2=A0 =C2=A0 (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGE= D_FLAG)|
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+ =C2=A0 =C2=A0 =C2=A0 (1<<TRANSPARENT_HUGEPAGE_PAGECACHE)|
+#endif
=C2=A0 =C2=A0 =C2=A0 =C2=A0 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FL= AG);

=C2=A0/* default scan 8*512 pte (or vmas) every 30 second */
@@ -362,6 +365,23 @@ static ssize_t defrag_store(struct kobject *kobj,
=C2=A0static struct kobj_attribute defrag_attr =3D
=C2=A0 =C2=A0 =C2=A0 =C2=A0 __ATTR(defrag, 0644, defrag_show, defrag_store)= ;

+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+static ssize_t page_cache_show(struct kobject *kobj,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 struct kobj_attribute *a= ttr, char *buf)
+{
+ =C2=A0 =C2=A0 =C2=A0 return single_flag_show(kobj, attr, buf,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 TRANSPARENT_HUGEPAGE_PAGECACHE);
+}
+static ssize_t page_cache_store(struct kobject *kobj,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 struct kobj_attribute *a= ttr, const char *buf, size_t count)
+{
+ =C2=A0 =C2=A0 =C2=A0 return single_flag_store(kobj, attr, buf, count,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0TRANSPARENT_HUGEPAGE_PAGECACHE);
+}
+static struct kobj_attribute page_cache_attr =3D
+ =C2=A0 =C2=A0 =C2=A0 __ATTR(page_cache, 0644, page_cache_show, page_cache= _store);
+#endif
+
=C2=A0static ssize_t use_zero_page_show(struct kobject *kobj,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 struct kobj_attribu= te *attr, char *buf)
=C2=A0{
@@ -397,6 +417,9 @@ static struct kobj_attribute debug_cow_attr =3D
=C2=A0static struct attribute *hugepage_attr[] =3D {
=C2=A0 =C2=A0 =C2=A0 =C2=A0 &enabled_attr.attr,
=C2=A0 =C2=A0 =C2=A0 =C2=A0 &defrag_attr.attr,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+ =C2=A0 =C2=A0 =C2=A0 &page_cache_attr.attr,
+#endif
=C2=A0 =C2=A0 =C2=A0 =C2=A0 &use_zero_page_attr.attr,
=C2=A0#ifdef CONFIG_DEBUG_VM
=C2=A0 =C2=A0 =C2=A0 =C2=A0 &debug_cow_attr.attr,
--
1.8.3.2


--001a11c2c158ba3eed04e5a9f455-- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org