linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <gabriel@krisman.be>
To: "André Almeida" <andrealmeid@igalia.com>
Cc: Hugh Dickins <hughd@google.com>,
	 Andrew Morton <akpm@linux-foundation.org>,
	 Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>,  Jan Kara <jack@suse.cz>,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,  kernel-dev@igalia.com,
	 Daniel Rosenberg <drosen@google.com>,
	 smcv@collabora.com,  Christoph Hellwig <hch@lst.de>,
	Theodore Ts'o <tytso@mit.edu>
Subject: Re: [PATCH v4 07/10] tmpfs: Add casefold lookup support
Date: Thu, 12 Sep 2024 15:04:27 -0400	[thread overview]
Message-ID: <87ed5olmmc.fsf@mailhost.krisman.be> (raw)
In-Reply-To: <20240911144502.115260-8-andrealmeid@igalia.com> (=?utf-8?Q?=22Andr=C3=A9?= Almeida"'s message of "Wed, 11 Sep 2024 11:44:59 -0300")

André Almeida <andrealmeid@igalia.com> writes:

> Enable casefold lookup in tmpfs, based on the encoding defined by
> userspace. That means that instead of comparing byte per byte a file
> name, it compares to a case-insensitive equivalent of the Unicode
> string.

I think this looks much better.

A few things left. First, I'd suggest you merge patch 5 and this
one. There is not much sense in keeping them separate; patch 5 is a
dependency that won't crash the build, but might cause runtime errors if anyone
cherry-picks the feature but forgets to pull it.

> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..4fde63596ab3 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -40,6 +40,8 @@
>  #include <linux/fs_parser.h>
>  #include <linux/swapfile.h>
>  #include <linux/iversion.h>
> +#include <linux/unicode.h>

> +#include <linux/parser.h>

I think you don't need this header anymore

>  #include "swap.h"
>  
>  static struct vfsmount *shm_mnt __ro_after_init;
> @@ -123,6 +125,8 @@ struct shmem_options {
>  	bool noswap;
>  	unsigned short quota_types;
>  	struct shmem_quota_limits qlimits;
> +	struct unicode_map *encoding;
> +	bool strict_encoding;
>  #define SHMEM_SEEN_BLOCKS 1
>  #define SHMEM_SEEN_INODES 2
>  #define SHMEM_SEEN_HUGE 4
> @@ -3427,6 +3431,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	if (IS_ERR(inode))
>  		return PTR_ERR(inode);
>  
> +	if (IS_ENABLED(CONFIG_UNICODE) &&
> +	    !generic_ci_validate_strict_name(dir, &dentry->d_name))
> +		return -EINVAL;
> +

Commenting here, but this is about generic_ci_validate_strict_name.  Can
you make it an inline function in the header file instead? This way you
can fold the IS_ENABLED into it and also avoid the function call.

>  	error = simple_acl_create(dir, inode);
>  	if (error)
>  		goto out_iput;
> @@ -3442,7 +3450,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
> +
>  	dget(dentry); /* Extra count - pin the dentry in core */
>  	return error;
>  
> @@ -3533,7 +3546,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir,
>  	inc_nlink(inode);
>  	ihold(inode);	/* New dentry reference */
>  	dget(dentry);	/* Extra pinning count for the created dentry */
> -	d_instantiate(dentry, inode);
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  out:
>  	return ret;
>  }
> @@ -3553,6 +3569,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
>  	inode_inc_iversion(dir);
>  	drop_nlink(inode);
>  	dput(dentry);	/* Undo the count from "create" - does all the work */
> +
> +	/*
> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
> +	 * we invalidate them
> +	 */
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_invalidate(dentry);
> +
>  	return 0;
>  }
>  
> @@ -3697,7 +3721,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  	dget(dentry);
>  	return 0;
>  
> @@ -4050,6 +4077,9 @@ enum shmem_param {
>  	Opt_usrquota_inode_hardlimit,
>  	Opt_grpquota_block_hardlimit,
>  	Opt_grpquota_inode_hardlimit,
> +	Opt_casefold_version,
> +	Opt_casefold,
> +	Opt_strict_encoding,
>  };
>  
>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -4081,9 +4111,53 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
>  	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
>  #endif
> +	fsparam_string("casefold",	Opt_casefold_version),
> +	fsparam_flag  ("casefold",	Opt_casefold),
> +	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
>  	{}
>  };
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)
> +{
> +	struct shmem_options *ctx = fc->fs_private;
> +	unsigned int version = UTF8_LATEST;
> +	struct unicode_map *encoding;
> +	char *version_str = param->string + 5;
> +
> +	if (!latest_version) {
> +		if (strncmp(param->string, "utf8-", 5))
> +			return invalfc(fc, "Only UTF-8 encodings are supported "
> +				       "in the format: utf8-<version number>");
> +
> +		version = utf8_parse_version(version_str);
> +		if (version < 0)
> +			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
> +	}
> +
> +	encoding = utf8_load(version);
> +
> +	if (IS_ERR(encoding)) {
> +		return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n",
> +		unicode_major(version), unicode_minor(version), unicode_rev(version));

bad indentation?

> +	}
> +
> +	pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n",
> +		unicode_major(version), unicode_minor(version), unicode_rev(version));
> +
> +	ctx->encoding = encoding;
> +
> +	return 0;
> +}
> +#else
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)
> +{
> +	return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n");
> +}
> +#endif
> +
>  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  {
>  	struct shmem_options *ctx = fc->fs_private;
> @@ -4242,6 +4316,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  				       "Group quota inode hardlimit too large.");
>  		ctx->qlimits.grpquota_ihardlimit = size;
>  		break;
> +	case Opt_casefold_version:
> +		return shmem_parse_opt_casefold(fc, param, false);
> +	case Opt_casefold:
> +		return shmem_parse_opt_casefold(fc, param, true);
> +	case Opt_strict_encoding:
> +		ctx->strict_encoding = true;

Using strict_encoding without encoding should be explicitly forbidden.

> +		break;
>  	}
>  	return 0;
>  
> @@ -4471,6 +4552,11 @@ static void shmem_put_super(struct super_block *sb)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (sb->s_encoding)
> +		utf8_unload(sb->s_encoding);
> +#endif
> +
>  #ifdef CONFIG_TMPFS_QUOTA
>  	shmem_disable_quotas(sb);
>  #endif
> @@ -4481,6 +4567,17 @@ static void shmem_put_super(struct super_block *sb)
>  	sb->s_fs_info = NULL;
>  }
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static const struct dentry_operations shmem_ci_dentry_ops = {
> +	.d_hash = generic_ci_d_hash,
> +	.d_compare = generic_ci_d_compare,
> +#ifdef CONFIG_FS_ENCRYPTION
> +	.d_revalidate = fscrypt_d_revalidate,
> +#endif
> +	.d_delete = always_delete_dentry,
> +};
> +#endif
> +
>  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  {
>  	struct shmem_options *ctx = fc->fs_private;
> @@ -4515,9 +4612,21 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	}
>  	sb->s_export_op = &shmem_export_ops;
>  	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (ctx->encoding) {
> +		sb->s_encoding = ctx->encoding;
> +		sb->s_d_op = &shmem_ci_dentry_ops;
> +		if (ctx->strict_encoding)
> +			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
> +	}
>  #else
> -	sb->s_flags |= SB_NOUSER;
> +	sb->s_d_op = &simple_dentry_operations;

Moving simple_dentry_operations to be set at s_d_op should be a separate
patch.

It is a change that has non-obvious side effects (i.e. the way we
treat the root dentry) so it needs proper review by itself.  It is
also not related to the rest of the case-insensitive patch.

Also, why is it done only for CONFIG_UNICODE=n?

>  #endif
> +
> +#else
> +	sb->s_flags |= SB_NOUSER;
> +#endif /* CONFIG_TMPFS */
>  	sbinfo->max_blocks = ctx->blocks;
>  	sbinfo->max_inodes = ctx->inodes;
>  	sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
> @@ -4791,6 +4900,8 @@ int shmem_init_fs_context(struct fs_context *fc)
>  	ctx->uid = current_fsuid();
>  	ctx->gid = current_fsgid();
>  
> +	ctx->encoding = NULL;
> +
>  	fc->fs_private = ctx;
>  	fc->ops = &shmem_fs_context_ops;
>  	return 0;

-- 
Gabriel Krisman Bertazi


  reply	other threads:[~2024-09-12 19:04 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-11 14:44 [PATCH v4 00/10] tmpfs: Add case-insensitive support for tmpfs André Almeida
2024-09-11 14:44 ` [PATCH v4 01/10] libfs: Create the helper function generic_ci_validate_strict_name() André Almeida
2024-09-12 19:16   ` Gabriel Krisman Bertazi
2024-09-11 14:44 ` [PATCH v4 02/10] ext4: Use generic_ci_validate_strict_name helper André Almeida
2024-09-12 19:14   ` Gabriel Krisman Bertazi
2024-09-11 14:44 ` [PATCH v4 03/10] unicode: Recreate utf8_parse_version() André Almeida
2024-09-12 19:14   ` Gabriel Krisman Bertazi
2024-09-11 14:44 ` [PATCH v4 04/10] unicode: Export latest available UTF-8 version number André Almeida
2024-09-11 14:44 ` [PATCH v4 05/10] libfs: Check for casefold dirs on simple_lookup() André Almeida
2024-09-11 14:44 ` [PATCH v4 06/10] libfs: Export generic_ci_ dentry functions André Almeida
2024-09-12 19:13   ` Gabriel Krisman Bertazi
2024-09-11 14:44 ` [PATCH v4 07/10] tmpfs: Add casefold lookup support André Almeida
2024-09-12 19:04   ` Gabriel Krisman Bertazi [this message]
2024-10-02  1:40     ` André Almeida
2024-10-02 21:29       ` Gabriel Krisman Bertazi
2024-09-13 16:28   ` kernel test robot
2024-09-13 18:25   ` kernel test robot
2024-09-11 14:45 ` [PATCH v4 08/10] tmpfs: Add flag FS_CASEFOLD_FL support for tmpfs dirs André Almeida
2024-09-12 19:10   ` Gabriel Krisman Bertazi
2024-09-11 14:45 ` [PATCH v4 09/10] tmpfs: Expose filesystem features via sysfs André Almeida
2024-09-12 19:07   ` Gabriel Krisman Bertazi
2024-09-11 14:45 ` [PATCH v4 10/10] docs: tmpfs: Add casefold options André Almeida
2024-09-12 19:07   ` Gabriel Krisman Bertazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87ed5olmmc.fsf@mailhost.krisman.be \
    --to=gabriel@krisman.be \
    --cc=akpm@linux-foundation.org \
    --cc=andrealmeid@igalia.com \
    --cc=brauner@kernel.org \
    --cc=drosen@google.com \
    --cc=hch@lst.de \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=kernel-dev@igalia.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=smcv@collabora.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox