Re: [PATCH RFC] shmem: add hugetlbfs sealing

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Mike Kravetz <mike.kravetz@oracle.com>
To: marcandre.lureau@redhat.com, linux-mm@kvack.org
Cc: hughd@google.com, nyc@holomorphy.com
Subject: Re: [PATCH RFC] shmem: add hugetlbfs sealing
Date: Fri, 27 Oct 2017 11:37:16 -0700	[thread overview]
Message-ID: <4eef58c6-62aa-21f1-4acc-9fce7f32952a@oracle.com> (raw)
In-Reply-To: <20171026132052.28504-1-marcandre.lureau@redhat.com>

On 10/26/2017 06:20 AM, marcandre.lureau@redhat.com wrote:
> From: Marc-AndrA(C) Lureau <marcandre.lureau@redhat.com>
> 
> Hi,
> 
> Recently, Mike Kravetz added hugetlbfs support to memfd. However, he
> didn't need sealing support. One of the reasons to use memfd is to
> have shared memory sealing when doing IPC. Qemu uses shared memory &
> hugetables with vhost-user (used by dpdk). I am adding memfd support
> in qemu, for convenience and security reasons. It would be great if
> hugetlbfs had sealing support. I don't have much experience with
> kernel & mm, but I came up with the following patch. It works with a
> slightly modified memfd_test.c, enabling most tests again (only
> pwrite() has to be skipped). I would like to have some early feedback
> before I send a cleaned up series on the lkml.
> 

This is the first time I have looked closely at the sealing code, so
I may have missed something.  However, in general the changes seem
reasonable and follow what was done for sealing in existing code.

Just a few nits below.

> Thanks!
> 
> Signed-off-by: Marc-AndrA(C) Lureau <marcandre.lureau@redhat.com>
> ---
>  fs/hugetlbfs/inode.c     | 36 ++++++++++++++++++++++++----------
>  include/linux/hugetlb.h  | 11 +++++++++++
>  include/linux/shmem_fs.h |  2 --
>  mm/shmem.c               | 51 +++++++++++++++++++++++++-----------------------
>  4 files changed, 64 insertions(+), 36 deletions(-)
> 
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 59073e9f01a4..a39c315e6d0c 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -55,16 +55,6 @@ struct hugetlbfs_config {
>  	umode_t			mode;
>  };
>  
> -struct hugetlbfs_inode_info {
> -	struct shared_policy policy;
> -	struct inode vfs_inode;
> -};
> -
> -static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
> -{
> -	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
> -}
> -
>  int sysctl_hugetlb_shm_group;
>  
>  enum {
> @@ -520,8 +510,16 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  
>  	if (hole_end > hole_start) {
>  		struct address_space *mapping = inode->i_mapping;
> +		struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
>  
>  		inode_lock(inode);
> +
> +		/* protected by i_mutex */
> +		if (info->seals & F_SEAL_WRITE) {
> +			inode_unlock(inode);
> +			return -EPERM;
> +		}
> +
>  		i_mmap_lock_write(mapping);
>  		if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
>  			hugetlb_vmdelete_list(&mapping->i_mmap,
> @@ -539,6 +537,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
>  				loff_t len)
>  {
>  	struct inode *inode = file_inode(file);
> +	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
>  	struct address_space *mapping = inode->i_mapping;
>  	struct hstate *h = hstate_inode(inode);
>  	struct vm_area_struct pseudo_vma;
> @@ -570,6 +569,12 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
>  	if (error)
>  		goto out;
>  
> +	/* do we need this if FL_KEEP_SIZE is mandatory? let's be careful */

Not sure I understand "FL_KEEP_SIZE is mandatory".  It is not mandatory.
So, the code/check is required.

> +	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
> +		error = -EPERM;
> +		goto out;
> +	}
> +
>  	/*
>  	 * Initialize a pseudo vma as this is required by the huge page
>  	 * allocation routines.  If NUMA is configured, use page index
> @@ -660,6 +665,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
>  	struct hstate *h = hstate_inode(inode);
>  	int error;
>  	unsigned int ia_valid = attr->ia_valid;
> +	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
>  
>  	BUG_ON(!inode);
>  
> @@ -668,9 +674,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
>  		return error;
>  
>  	if (ia_valid & ATTR_SIZE) {
> +		loff_t oldsize = inode->i_size;
> +		loff_t newsize = attr->ia_size;
> +
>  		error = -EINVAL;
>  		if (attr->ia_size & ~huge_page_mask(h))

Might want to change 'attr->ia_size' to newsize here.

>  			return -EINVAL;
> +		/* protected by i_mutex */
> +		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
> +		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
> +			return -EPERM;
>  		error = hugetlb_vmtruncate(inode, attr->ia_size);

and here as well.

>  		if (error)
>  			return error;
> @@ -723,6 +736,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
>  
>  	inode = new_inode(sb);
>  	if (inode) {
> +		struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
> +
>  		inode->i_ino = get_next_ino();
>  		inode_init_owner(inode, dir, mode);
>  		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
> @@ -730,6 +745,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
>  		inode->i_mapping->a_ops = &hugetlbfs_aops;
>  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
>  		inode->i_mapping->private_data = resv_map;
> +		info->seals = F_SEAL_SEAL;
>  		switch (mode & S_IFMT) {
>  		default:
>  			init_special_inode(inode, mode, dev);
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 8bbbd37ab105..42945aa3bcc1 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -44,6 +44,17 @@ extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
>  #include <linux/shm.h>
>  #include <asm/tlbflush.h>
>  
> +struct hugetlbfs_inode_info {
> +    struct shared_policy policy;
> +    struct inode vfs_inode;
> +    unsigned int seals;
> +};
> +
> +static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
> +{
> +    return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
> +}
> +

This header file is a mess (not your fault!).   I would suggest putting
this after:

#ifdef CONFIG_HUGETLBFS
<and super block information>

>  struct hugepage_subpool {
>  	spinlock_t lock;
>  	long count;
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index b6c3540e07bc..557d0c3b6eca 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -109,8 +109,6 @@ extern void shmem_uncharge(struct inode *inode, long pages);
>  
>  #ifdef CONFIG_TMPFS
>  
> -extern int shmem_add_seals(struct file *file, unsigned int seals);
> -extern int shmem_get_seals(struct file *file);
>  extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
>  
>  #else
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 07a1d22807be..6ceb5f43cb10 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2722,10 +2722,10 @@ static int shmem_wait_for_pins(struct address_space *mapping)
>  		     F_SEAL_GROW | \
>  		     F_SEAL_WRITE)
>  
> -int shmem_add_seals(struct file *file, unsigned int seals)
> +static int memfd_add_seals(struct file *file, unsigned int seals)
>  {
>  	struct inode *inode = file_inode(file);
> -	struct shmem_inode_info *info = SHMEM_I(inode);
> +	unsigned int *info_seals;
>  	int error;
>  
>  	/*
> @@ -2758,8 +2758,13 @@ int shmem_add_seals(struct file *file, unsigned int seals)
>  	 * other file types.
>  	 */
>  
> -	if (file->f_op != &shmem_file_operations)
> +	if (file->f_op == &shmem_file_operations)
> +		info_seals = &SHMEM_I(inode)->seals;
> +	else if (file->f_op == &hugetlbfs_file_operations)
> +		info_seals = &HUGETLBFS_I(inode)->seals;
> +	else
>  		return -EINVAL;
> +
>  	if (!(file->f_mode & FMODE_WRITE))
>  		return -EPERM;
>  	if (seals & ~(unsigned int)F_ALL_SEALS)
> @@ -2767,12 +2772,12 @@ int shmem_add_seals(struct file *file, unsigned int seals)
>  
>  	inode_lock(inode);
>  
> -	if (info->seals & F_SEAL_SEAL) {
> +	if (*info_seals & F_SEAL_SEAL) {
>  		error = -EPERM;
>  		goto unlock;
>  	}
>  
> -	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
> +	if ((seals & F_SEAL_WRITE) && !(*info_seals & F_SEAL_WRITE)) {
>  		error = mapping_deny_writable(file->f_mapping);
>  		if (error)
>  			goto unlock;
> @@ -2784,23 +2789,24 @@ int shmem_add_seals(struct file *file, unsigned int seals)
>  		}
>  	}
>  
> -	info->seals |= seals;
> +	*info_seals |= seals;
>  	error = 0;
>  
>  unlock:
>  	inode_unlock(inode);
>  	return error;
>  }
> -EXPORT_SYMBOL_GPL(shmem_add_seals);
>  
> -int shmem_get_seals(struct file *file)
> +static int memfd_get_seals(struct file *file)
>  {
> -	if (file->f_op != &shmem_file_operations)
> -		return -EINVAL;
> +	if (file->f_op == &shmem_file_operations)
> +		return SHMEM_I(file_inode(file))->seals;
> +
> +	if (file->f_op == &hugetlbfs_file_operations)
> +		return HUGETLBFS_I(file_inode(file))->seals;
>  
> -	return SHMEM_I(file_inode(file))->seals;
> +	return -EINVAL;
>  }
> -EXPORT_SYMBOL_GPL(shmem_get_seals);

I can see why the routine name change makes sense.  However, I have no
idea why shmem_add/get_seals are extern and exported today.  Perhaps
Hugh or someone else can comment?

-- 
Mike Kravetz

>  
>  long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
>  {
> @@ -2812,10 +2818,10 @@ long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
>  		if (arg > UINT_MAX)
>  			return -EINVAL;
>  
> -		error = shmem_add_seals(file, arg);
> +		error = memfd_add_seals(file, arg);
>  		break;
>  	case F_GET_SEALS:
> -		error = shmem_get_seals(file);
> +		error = memfd_get_seals(file);
>  		break;
>  	default:
>  		error = -EINVAL;
> @@ -3659,19 +3665,16 @@ SYSCALL_DEFINE2(memfd_create,
>  		const char __user *, uname,
>  		unsigned int, flags)
>  {
> -	struct shmem_inode_info *info;
>  	struct file *file;
>  	int fd, error;
>  	char *name;
>  	long len;
> +	unsigned int *info_seals;
>  
>  	if (!(flags & MFD_HUGETLB)) {
>  		if (flags & ~(unsigned int)MFD_ALL_FLAGS)
>  			return -EINVAL;
>  	} else {
> -		/* Sealing not supported in hugetlbfs (MFD_HUGETLB) */
> -		if (flags & MFD_ALLOW_SEALING)
> -			return -EINVAL;
>  		/* Allow huge page size encoding in flags. */
>  		if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
>  				(MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
> @@ -3723,13 +3726,13 @@ SYSCALL_DEFINE2(memfd_create,
>  	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
>  	file->f_flags |= O_RDWR | O_LARGEFILE;
>  
> +	if (flags & MFD_HUGETLB)
> +		info_seals = &HUGETLBFS_I(file_inode(file))->seals;
> +	else
> +		info_seals = &SHMEM_I(file_inode(file))->seals;
> +
>  	if (flags & MFD_ALLOW_SEALING) {
> -		/*
> -		 * flags check at beginning of function ensures
> -		 * this is not a hugetlbfs (MFD_HUGETLB) file.
> -		 */
> -		info = SHMEM_I(file_inode(file));
> -		info->seals &= ~F_SEAL_SEAL;
> +		*info_seals &= ~F_SEAL_SEAL;
>  	}
>  
>  	fd_install(fd, file);
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

     prev parent reply	other threads:[~2017-10-27 18:37 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-26 13:20 marcandre.lureau
2017-10-27 18:37 ` Mike Kravetz [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4eef58c6-62aa-21f1-4acc-9fce7f32952a@oracle.com \
    --to=mike.kravetz@oracle.com \
    --cc=hughd@google.com \
    --cc=linux-mm@kvack.org \
    --cc=marcandre.lureau@redhat.com \
    --cc=nyc@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox