--- tmpfs1/Documentation/filesystems/tmpfs.txt 2004-11-11 19:24:14.438121120 +0000 +++ tmpfs2/Documentation/filesystems/tmpfs.txt 2004-11-11 19:24:32.173424944 +0000 @@ -78,6 +78,12 @@ use up all the memory on the machine; bu that instance in a system with many cpus making intensive use of it. +tmpfs has a mount option to set the NUMA memory allocation policy for +all files in that instance: +mpol=interleave prefers to allocate memory from each node in turn +mpol=default prefers to allocate memory from the local node + + To specify the initial root directory you can use the following mount options: --- tmpfs1/fs/hugetlbfs/inode.c 2004-11-11 12:39:59.000000000 +0000 +++ tmpfs2/fs/hugetlbfs/inode.c 2004-11-11 19:24:32.174424792 +0000 @@ -400,7 +400,7 @@ static struct inode *hugetlbfs_get_inode inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy); + mpol_shared_policy_init(&info->policy, MPOL_DEFAULT); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); --- tmpfs1/include/linux/mempolicy.h 2004-11-11 12:40:09.000000000 +0000 +++ tmpfs2/include/linux/mempolicy.h 2004-11-11 19:24:32.175424640 +0000 @@ -137,12 +137,7 @@ struct shared_policy { spinlock_t lock; }; -static inline void mpol_shared_policy_init(struct shared_policy *info) -{ - info->root = RB_ROOT; - spin_lock_init(&info->lock); -} - +void mpol_shared_policy_init(struct shared_policy *info, int policy); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *new); @@ -198,7 +193,8 @@ static inline int mpol_set_shared_policy return -EINVAL; } -static inline void mpol_shared_policy_init(struct shared_policy *info) +static inline void mpol_shared_policy_init(struct shared_policy *info, + int policy) { } --- tmpfs1/include/linux/shmem_fs.h 2004-10-18 22:56:50.000000000 +0100 +++ tmpfs2/include/linux/shmem_fs.h 2004-11-11 19:24:32.175424640 +0000 @@ -26,6 +26,7 @@ struct shmem_sb_info { unsigned long free_blocks; /* How many are left for allocation */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ + int policy; /* Default NUMA memory alloc policy */ spinlock_t stat_lock; }; --- tmpfs1/mm/mempolicy.c 2004-11-11 12:40:12.000000000 +0000 +++ tmpfs2/mm/mempolicy.c 2004-11-11 19:24:32.176424488 +0000 @@ -1060,6 +1060,28 @@ restart: return 0; } +void mpol_shared_policy_init(struct shared_policy *info, int policy) +{ + info->root = RB_ROOT; + spin_lock_init(&info->lock); + + if (policy != MPOL_DEFAULT) { + struct mempolicy *newpol; + + /* Falls back to MPOL_DEFAULT on any error */ + newpol = mpol_new(policy, nodes_addr(node_online_map)); + if (!IS_ERR(newpol)) { + /* Create pseudo-vma that contains just the policy */ + struct vm_area_struct pvma; + + memset(&pvma, 0, sizeof(struct vm_area_struct)); + /* Policy covers entire file */ + pvma.vm_end = ~0UL; + mpol_set_shared_policy(info, &pvma, newpol); + } + } +} + int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *npol) { --- tmpfs1/mm/shmem.c 2004-11-11 19:24:14.441120664 +0000 +++ tmpfs2/mm/shmem.c 2004-11-11 19:24:32.178424184 +0000 @@ -1292,7 +1292,7 @@ shmem_get_inode(struct super_block *sb, info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); - mpol_shared_policy_init(&info->policy); + mpol_shared_policy_init(&info->policy, sbinfo->policy); INIT_LIST_HEAD(&info->swaplist); switch (mode & S_IFMT) { @@ -1817,7 +1817,7 @@ static struct inode_operations shmem_sym #endif }; -static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) +static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes, int *policy) { char *this_char, *value, *rest; @@ -1871,6 +1871,13 @@ static int shmem_parse_options(char *opt *gid = simple_strtoul(value,&rest,0); if (*rest) goto bad_val; + } else if (!strcmp(this_char,"mpol")) { + if (!strcmp(value,"interleave")) + *policy = MPOL_INTERLEAVE; + else if (!strcmp(value,"default")) + *policy = MPOL_DEFAULT; + else + goto bad_val; } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", this_char); @@ -1891,12 +1898,13 @@ static int shmem_remount_fs(struct super struct shmem_sb_info *sbinfo = SHMEM_SB(sb); unsigned long max_blocks = sbinfo->max_blocks; unsigned long max_inodes = sbinfo->max_inodes; + int policy = sbinfo->policy; unsigned long blocks; unsigned long inodes; int error = -EINVAL; if (shmem_parse_options(data, NULL, NULL, NULL, - &max_blocks, &max_inodes)) + &max_blocks, &max_inodes, &policy)) return error; spin_lock(&sbinfo->stat_lock); @@ -1922,6 +1930,7 @@ static int shmem_remount_fs(struct super sbinfo->free_blocks = max_blocks - blocks; sbinfo->max_inodes = max_inodes; sbinfo->free_inodes = max_inodes - inodes; + sbinfo->policy = policy; out: spin_unlock(&sbinfo->stat_lock); return error; @@ -1952,6 +1961,7 @@ static int shmem_fill_super(struct super struct shmem_sb_info *sbinfo; unsigned long blocks = 0; unsigned long inodes = 0; + int policy = MPOL_DEFAULT; #ifdef CONFIG_TMPFS /* @@ -1965,7 +1975,7 @@ static int shmem_fill_super(struct super if (inodes > blocks) inodes = blocks; if (shmem_parse_options(data, &mode, &uid, &gid, - &blocks, &inodes)) + &blocks, &inodes, &policy)) return -EINVAL; } #else @@ -1983,6 +1993,7 @@ static int shmem_fill_super(struct super sbinfo->free_blocks = blocks; sbinfo->max_inodes = inodes; sbinfo->free_inodes = inodes; + sbinfo->policy = policy; sb->s_fs_info = sbinfo; sb->s_maxbytes = SHMEM_MAX_BYTES;