From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from westrelay02.boulder.ibm.com (westrelay02.boulder.ibm.com [9.17.195.11]) by e31.co.us.ibm.com (8.12.10/8.12.9) with ESMTP id j6BHaHeL216420 for ; Mon, 11 Jul 2005 13:36:17 -0400 Received: from d03av03.boulder.ibm.com (d03av03.boulder.ibm.com [9.17.195.169]) by westrelay02.boulder.ibm.com (8.12.10/NCO/VER6.6) with ESMTP id j6BHaHio394696 for ; Mon, 11 Jul 2005 11:36:17 -0600 Received: from d03av03.boulder.ibm.com (loopback [127.0.0.1]) by d03av03.boulder.ibm.com (8.12.11/8.13.3) with ESMTP id j6BHaGoi013053 for ; Mon, 11 Jul 2005 11:36:16 -0600 Message-ID: <42D2AE0F.8020809@austin.ibm.com> Date: Mon, 11 Jul 2005 12:36:15 -0500 From: Joel Schopp MIME-Version: 1.0 Subject: Re: [Fwd: [PATCH 2/4] cpusets new __GFP_HARDWALL flag] References: <1121101013.15095.19.camel@localhost> In-Reply-To: <1121101013.15095.19.camel@localhost> Content-Type: multipart/mixed; boundary="------------040303020509030602090006" Sender: owner-linux-mm@kvack.org Return-Path: To: Paul Jackson Cc: Dave Hansen , linux-mm@kvack.org, Mel Gorman List-ID: This is a multi-part message in MIME format. --------------040303020509030602090006 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Dave Hansen brought this to my attention. I've attached the bit of the memory fragmentation avoidance you conflict with (I'm working with Mel on his patches). I think we share similar goals, and I wouldn't mind changing __GFP_USERRCLM to __GFP_USERALLOC or some neutral name we could share. Anything to increase the chances of fragmentation avoidance getting merged is good in my book. -Joel >>GFP_USER allocations, and distinguish them from GFP_KERNEL allocations. >> >>Allocations (such as GFP_USER) marked GFP_HARDWALL are constrainted to >>the current tasks cpuset. Other allocations (such as GFP_KERNEL) can >>steal from the possibly larger nearest mem_exclusive cpuset ancestor, >>if memory is tight on every node in the current cpuset. >> >>This patch collides with Mel Gorman's patch to reduce fragmentation >>in the standard buddy allocator, which adds two GFP flags. At first >>glance, it seems that his added __GFP_USERRCLM flag could be used in >>place of the following __GFP_HARDWALL, as they both seem to be set >>the same way - for GFP_USER and GFP_HIGHUSER. Perhaps we should call >>this flag __GFP_USER, rather than some name dependent on its use(s). > > > Does this make sense to integrate into your patches? > > Index: linux-2.6-mem_exclusive/include/linux/gfp.h > =================================================================== > --- linux-2.6-mem_exclusive.orig/include/linux/gfp.h 2005-07-02 17:42:02.000000000 -0700 > +++ linux-2.6-mem_exclusive/include/linux/gfp.h 2005-07-02 17:43:00.000000000 -0700 > @@ -40,6 +40,7 @@ struct vm_area_struct; > #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ > #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ > #define __GFP_NORECLAIM 0x20000u /* No zone reclaim during page_cache_alloc */ > +#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */ > > #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ > #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) > @@ -48,14 +49,15 @@ struct vm_area_struct; > #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ > __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ > __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ > - __GFP_NOMEMALLOC|__GFP_NORECLAIM) > + __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL) > > #define GFP_ATOMIC (__GFP_HIGH) > #define GFP_NOIO (__GFP_WAIT) > #define GFP_NOFS (__GFP_WAIT | __GFP_IO) > #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) > -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS) > -#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM) > +#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) > +#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ > + __GFP_HIGHMEM) > > /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some > platforms, used as appropriate on others */ --------------040303020509030602090006 Content-Type: text/plain; name="patch-defrag-flags" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="patch-defrag-flags" Index: 2.6.13-rc1-mhp1/fs/buffer.c =================================================================== --- 2.6.13-rc1-mhp1.orig/fs/buffer.c 2005-06-29 15:11:40.%N -0500 +++ 2.6.13-rc1-mhp1/fs/buffer.c 2005-07-06 12:30:55.%N -0500 @@ -1110,7 +1110,8 @@ grow_dev_page(struct block_device *bdev, struct page *page; struct buffer_head *bh; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, + GFP_NOFS | __GFP_USERRCLM); if (!page) return NULL; @@ -3079,7 +3080,8 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_alloc(bh_cachep, + gfp_flags|__GFP_KERNRCLM); if (ret) { preempt_disable(); __get_cpu_var(bh_accounting).nr++; Index: 2.6.13-rc1-mhp1/fs/dcache.c =================================================================== --- 2.6.13-rc1-mhp1.orig/fs/dcache.c 2005-06-29 15:11:18.%N -0500 +++ 2.6.13-rc1-mhp1/fs/dcache.c 2005-07-06 12:32:00.%N -0500 @@ -719,7 +719,7 @@ struct dentry *d_alloc(struct dentry * p struct dentry *dentry; char *dname; - dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); + dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL|__GFP_KERNRCLM); if (!dentry) return NULL; Index: 2.6.13-rc1-mhp1/fs/ext2/super.c =================================================================== --- 2.6.13-rc1-mhp1.orig/fs/ext2/super.c 2005-06-29 15:11:18.%N -0500 +++ 2.6.13-rc1-mhp1/fs/ext2/super.c 2005-07-06 12:34:16.%N -0500 @@ -138,7 +138,8 @@ static kmem_cache_t * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); + ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, + SLAB_KERNEL|__GFP_KERNRCLM); if (!ei) return NULL; #ifdef CONFIG_EXT2_FS_POSIX_ACL Index: 2.6.13-rc1-mhp1/fs/ext3/super.c =================================================================== --- 2.6.13-rc1-mhp1.orig/fs/ext3/super.c 2005-06-29 15:11:18.%N -0500 +++ 2.6.13-rc1-mhp1/fs/ext3/super.c 2005-06-29 16:02:25.%N -0500 @@ -440,7 +440,7 @@ static struct inode *ext3_alloc_inode(st { struct ext3_inode_info *ei; - ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS); + ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS|__GFP_KERNRCLM); if (!ei) return NULL; #ifdef CONFIG_EXT3_FS_POSIX_ACL Index: 2.6.13-rc1-mhp1/fs/ntfs/inode.c =================================================================== --- 2.6.13-rc1-mhp1.orig/fs/ntfs/inode.c 2005-06-29 15:11:18.%N -0500 +++ 2.6.13-rc1-mhp1/fs/ntfs/inode.c 2005-07-06 13:10:49.%N -0500 @@ -317,8 +317,8 @@ struct inode *ntfs_alloc_big_inode(struc ntfs_inode *ni; ntfs_debug("Entering."); - ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, - SLAB_NOFS); + ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, + SLAB_NOFS|__GFP_KERNRCLM); if (likely(ni != NULL)) { ni->state = 0; return VFS_I(ni); @@ -343,7 +343,8 @@ static inline ntfs_inode *ntfs_alloc_ext ntfs_inode *ni; ntfs_debug("Entering."); - ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); + ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, + SLAB_NOFS|__GFP_KERNRCLM); if (likely(ni != NULL)) { ni->state = 0; return ni; Index: 2.6.13-rc1-mhp1/include/linux/gfp.h =================================================================== --- 2.6.13-rc1-mhp1.orig/include/linux/gfp.h 2005-06-29 15:11:35.%N -0500 +++ 2.6.13-rc1-mhp1/include/linux/gfp.h 2005-07-06 12:39:56.%N -0500 @@ -40,22 +40,26 @@ struct vm_area_struct; #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ #define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ +#define __GFP_KERNRCLM 0x40000u /* Kernel page that is easily reclaimable */ +#define __GFP_USERRCLM 0x80000u /* User is a userspace user */ -#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) /* if you forget to add the bitmask here kernel will crash, period */ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_NORECLAIM) + __GFP_NOMEMALLOC|__GFP_NORECLAIM| \ + __GFP_USERRCLM|__GFP_KERNRCLM) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_USERRCLM) +#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM | \ + __GFP_USERRCLM) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ --------------040303020509030602090006-- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: aart@kvack.org