linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Pasha Tatashin <pasha.tatashin@soleen.com>
To: Christian Brauner <brauner@kernel.org>
Cc: luca.boccassi@gmail.com, kexec@lists.infradead.org,
	linux-mm@kvack.org,  graf@amazon.com, rppt@kernel.org,
	pasha.tatashin@soleen.com, pratyush@kernel.org,
	 linux-kernel@vger.kernel.org
Subject: Re: [PATCH v8 3/6] liveupdate: add LUO_SESSION_MAGIC magic inode type
Date: Mon, 20 Apr 2026 14:55:56 +0000	[thread overview]
Message-ID: <xpi24rt5ek2m2wkhv6celu75hgtgte2x4jhfkjar4gi22r7bdh@ilbxbhfxmm2v> (raw)
In-Reply-To: <20260420-unbeeindruckt-besprach-910fd241c32e@brauner>

On 04-20 14:26, Christian Brauner wrote:
> On Sat, Apr 18, 2026 at 05:28:20PM +0100, luca.boccassi@gmail.com wrote:
> > From: Luca Boccassi <luca.boccassi@gmail.com>
> > 
> > In userspace when managing LUO sessions we want to be able to identify
> > a FD as a LUO session, in order to be able to do the special handling
> > that they require in order to function as intended on kexec.
> > 
> > Currently this requires scraping procfs and doing string matching on
> > the prefix of the dname, which is not an ideal interface.
> > 
> > Add a singleton inode type with a magic value, so that we can
> > programmatically identify a fd as a LUO session via fstatfs().
> > 
> > Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
> > Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> > ---
> >  include/uapi/linux/magic.h       |  1 +
> >  kernel/liveupdate/luo_core.c     | 10 +++-
> >  kernel/liveupdate/luo_internal.h |  2 +
> >  kernel/liveupdate/luo_session.c  | 89 ++++++++++++++++++++++++++++++--
> >  4 files changed, 96 insertions(+), 6 deletions(-)
> > 
> > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> > index 4f2da935a76c..4f51005522ff 100644
> > --- a/include/uapi/linux/magic.h
> > +++ b/include/uapi/linux/magic.h
> > @@ -105,5 +105,6 @@
> >  #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
> >  #define GUEST_MEMFD_MAGIC	0x474d454d	/* "GMEM" */
> >  #define NULL_FS_MAGIC		0x4E554C4C	/* "NULL" */
> > +#define LUO_SESSION_MAGIC	0x4c554f53	/* "LUOS" */
> >  
> >  #endif /* __LINUX_MAGIC_H__ */
> > diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
> > index dda7bb57d421..f1a63ebe4fa4 100644
> > --- a/kernel/liveupdate/luo_core.c
> > +++ b/kernel/liveupdate/luo_core.c
> > @@ -197,9 +197,17 @@ static int __init luo_late_startup(void)
> >  	if (!liveupdate_enabled())
> >  		return 0;
> >  
> > +	err = luo_session_fs_init();
> > +	if (err) {
> > +		luo_global.enabled = false;
> > +		return err;
> > +	}
> > +
> >  	err = luo_fdt_setup();
> > -	if (err)
> > +	if (err) {
> > +		luo_session_fs_cleanup();
> >  		luo_global.enabled = false;
> > +	}
> >  
> >  	return err;
> >  }
> > diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
> > index 8083d8739b09..d4ac7b4c5882 100644
> > --- a/kernel/liveupdate/luo_internal.h
> > +++ b/kernel/liveupdate/luo_internal.h
> > @@ -79,6 +79,8 @@ struct luo_session {
> >  
> >  int luo_session_create(const char *name, struct file **filep);
> >  int luo_session_retrieve(const char *name, struct file **filep);
> > +int __init luo_session_fs_init(void);
> > +void __init luo_session_fs_cleanup(void);
> >  int __init luo_session_setup_outgoing(void *fdt);
> >  int __init luo_session_setup_incoming(void *fdt);
> >  int luo_session_serialize(void);
> > diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c
> > index 5e316a4c5d71..21cbe99fc819 100644
> > --- a/kernel/liveupdate/luo_session.c
> > +++ b/kernel/liveupdate/luo_session.c
> > @@ -50,7 +50,6 @@
> >  
> >  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> >  
> > -#include <linux/anon_inodes.h>
> >  #include <linux/cleanup.h>
> >  #include <linux/err.h>
> >  #include <linux/errno.h>
> > @@ -62,7 +61,10 @@
> >  #include <linux/libfdt.h>
> >  #include <linux/list.h>
> >  #include <linux/liveupdate.h>
> > +#include <linux/magic.h>
> > +#include <linux/mount.h>
> >  #include <linux/mutex.h>
> > +#include <linux/pseudo_fs.h>
> >  #include <linux/rwsem.h>
> >  #include <linux/slab.h>
> >  #include <linux/unaligned.h>
> > @@ -363,18 +365,73 @@ static const struct file_operations luo_session_fops = {
> >  	.unlocked_ioctl = luo_session_ioctl,
> >  };
> >  
> > +static struct vfsmount *luo_session_mnt __ro_after_init;
> > +static struct inode *luo_session_inode __ro_after_init;
> > +
> > +/*
> > + * Reject all attribute changes on the singleton session inode.
> > + * Without this the VFS falls back to simple_setattr(), allowing
> > + * fchmod()/fchown() to modify the shared inode.
> > + */
> > +static int luo_session_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> > +			       struct iattr *attr)
> 
> Don't duplicate, please. Use the generic helper instead:
> 
> int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> 		       struct iattr *attr)
> 
> > +{
> > +	return -EOPNOTSUPP;
> 
> 
> 
> > +}
> > +
> > +static const struct inode_operations luo_session_inode_operations = {
> > +	.setattr	= luo_session_setattr,
> > +};
> > +
> > +static char *luo_session_dname(struct dentry *dentry, char *buffer, int buflen)
> > +{
> > +	return dynamic_dname(buffer, buflen, "luo_session:%s",
> > +			     dentry->d_name.name);
> 
> Use the luo_session:[%s] which is the canonical format for this
> (ignoring historcal abberations).
> 
> > +}
> > +
> > +static const struct dentry_operations luo_session_dentry_operations = {
> > +	.d_dname	= luo_session_dname,
> > +};
> > +
> > +static int luo_session_init_fs_context(struct fs_context *fc)
> > +{
> > +	struct pseudo_fs_context *ctx;
> > +
> > +	ctx = init_pseudo(fc, LUO_SESSION_MAGIC);
> 
> I'd just call that LUO_FS_MAGIC.
> 
> > +	if (!ctx)
> > +		return -ENOMEM;
> > +
> > +	fc->s_iflags |= SB_I_NOEXEC;
> > +	fc->s_iflags |= SB_I_NODEV;
> 
>         ctx->s_d_flags |= DCACHE_DONTCACHE;
> 
> static const struct super_operations luo_session_sops = {
>         .drop_inode     = inode_just_drop,
>         .statfs         = simple_statfs,
> };
> 
> 
> > +	ctx->dops = &luo_session_dentry_operations;
> 
>         ctx->ops = &luo_session_sops;
> 
> > +	return 0;
> > +}
> > +
> > +static struct file_system_type luo_session_fs_type = {
> > +	.name = "luo_session",
> > +	.init_fs_context = luo_session_init_fs_context,
> > +	.kill_sb = kill_anon_super,
> > +};
> > +
> >  /* Create a "struct file" for session */
> >  static int luo_session_getfile(struct luo_session *session, struct file **filep)
> 
> Luo is going full anti-pattern here. This whole return via a function
> argument completely messes up the later codepths. We don't do manual
> get_unused_fd_flags() flags and then file in new code, and then fail
> in-between:
> 
>         argp->fd = get_unused_fd_flags(O_CLOEXEC);
>         if (argp->fd < 0)
>                 return argp->fd;
> 
>         err = luo_session_create(argp->name, &file);
>         if (err)
>                 goto err_put_fd;
> 
>         err = luo_ucmd_respond(ucmd, sizeof(*argp));
>         if (err)
>                 goto err_put_file;
> 
>         fd_install(argp->fd, file);
> 
> Restructure the code so it just becomes:
> 
> struct file *luo_session_create(argp->name);
> 
> static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
> {
>         struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
> 
>         return FD_ADD(O_CLOEXEC, luo_session_create(argp->name));
> }
> 
> and get rid of all this state and error handling. Please fix this.

We cannot do it this way because we must use copy_to_user() to return fd 
via ioctl(), and since copy_to_user() may fail, we must do it prior to 
fd_install().

Unless there is a specific VFS macro you'd prefer for this 
delayed-install pattern, I do not see any other way to do this but 
maintain the get_unused_fd_flags() -> copy_to_user() -> fd_install() to 
prevent the fd being leaked into the process's table.

> 
> >  {
> > -	char name_buf[128];
> > +	char name_buf[LIVEUPDATE_SESSION_NAME_LENGTH + 1];
> >  	struct file *file;
> >  
> >  	lockdep_assert_held(&session->mutex);
> > -	snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
> > -	file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
> > -	if (IS_ERR(file))
> > +
> > +	ihold(luo_session_inode);
> 
> Right, you're now sharing the same inode among all luo sessions. So
> you've gained the ability to recognize luo inodes via fstatfs() but you
> still can't compare two luo session file descriptors for equality using
> stat() which is a major win and if you're doing this work anyway, let's

Luca, is there a specific use case in userspace where we need to compare 
LUO sessions for equality?

Christian's proposed solution of using unique inodes provides a standard 
VFS interface, but it introduces some memory overhead and, more 
importantly, a performance overhead due to the extra metadata 
allocations required during the performance-critical kexec blackout 
window.

I do not mind adding it, but I want to verify if this equality check is 
actually needed for your userspace agents, once we extend this, there is 
no way back.

> just go the extra step. You can mostly mirror pidfs and it's not
> difficult:
> 
> // unique inode numbers for the lifetime of the system instead of using
> // the shared ino allocator that can overflow
> 
> DEFINE_COOKIE(luo_session_cookie);
> 
> static u64 luo_alloc_ino(void)
> {
>         u64 ino;
> 
>         preempt_disable();
>         ino = gen_cookie_next(&luo_ino_cookie);
>         preempt_enable();
> 
>         VFS_WARN_ON_ONCE(ino < 1);
>         return ino;
> }
> 
> static struct inode *luo_new_inode()
> {
> 	inode = new_inode_pseudo(sb);
> 
> 	inode->i_flags	|= S_IMMUTABLE;
> 	inode->i_mode	|= S_IRWXU;
> 	inode->i_flags	|= S_PRIVATE | S_ANON_INODE;
> 	simple_inode_init_ts(inode);
> 
> 	inode->i_op	= &luo_session_inode_operations;
> 	// Make sure to leave inode->i_fop unset so it luo fds cannot be reopened via procfs
> 
> 	// Unique 64-bit inode. On 32-bit this will obviously be
> 	// truncated but a) LUO doesn't support 32-bit b) let's declare
> 	// anyone that wants LUO on 32-bit insane c) who uses more than
> 	// 32-bit worth of luo sessions d) if that's really something
> 	// you want to address you can do so later doing the same thing
> 	// I did for pidfs (see there for context).
>         inode->i_ino	= luo_alloc_ino();
>         inode->i_generation = pidfs_gen(pid->ino);
> 
> 	simple_inode_init_ts(inode);
> 	return inode;
> }
> 
> Now you can do
> 
> stat(fd_luo1, &st1);
> stat(fd_luo2, &st2);
> 
> if (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino)
> 		return true; // same luo session
> 
> Sooner or later you will want this. Let's do it correctly, right now.
> 
> A bigger design question for the LUO people:
> 
> How do the lifetimes of the luo session and the luo fd relate to each
> other? The way the code is structured it looks like the luo session
> thing that you currently store in the file:
> 
>         file->private_data = session;
> 
> outlives the file? Why?
> 
> Imho, two things should change in addition to the switch to the tiny fs:
> 
> (1) The lifetime of the luo session object should be tied to the
>     lifetime of the session file descriptor so that when the last file
>     descriptor for that luo session is closed the @session thing gets
>     cleaned up. I assume that's the case anyway?
> 
> (2) Instead of stashing the session in file->private_data store it in
>     inode->private_data. This has various advantages:
> 
>     * You can stash other information in file->private_data.
>     * You can at some point have multiple files referring to the same
>       inode/luo session.
> 
> > +
> > +	snprintf(name_buf, sizeof(name_buf), "%s", session->name);
> > +	file = alloc_file_pseudo(luo_session_inode, luo_session_mnt, name_buf,
> > +				 O_RDWR, &luo_session_fops);
> > +	if (IS_ERR(file)) {
> > +		iput(luo_session_inode);
> >  		return PTR_ERR(file);
> > +	}
> >  
> > +	file->private_data = session;
> >  	*filep = file;
> >  
> >  	return 0;
> > @@ -653,3 +710,25 @@ void luo_session_resume(void)
> >  	up_write(&luo_session_global.outgoing.rwsem);
> >  	up_write(&luo_session_global.incoming.rwsem);
> >  }
> > +
> > +int __init luo_session_fs_init(void)
> > +{
> > +	luo_session_mnt = kern_mount(&luo_session_fs_type);
> > +	if (IS_ERR(luo_session_mnt))
> 
> panic()
> 
> > +		return PTR_ERR(luo_session_mnt);
> > +
> > +	luo_session_inode = alloc_anon_inode(luo_session_mnt->mnt_sb);
> > +	if (IS_ERR(luo_session_inode)) {
> > +		kern_unmount(luo_session_mnt);
> > +		return PTR_ERR(luo_session_inode);
> > +	}
> > +	luo_session_inode->i_op = &luo_session_inode_operations;
> 
> Kill all of this now that you allocate inodes on-demand.
> 
> > +
> > +	return 0;
> > +}
> > +
> > +void __init luo_session_fs_cleanup(void)
> > +{
> > +	iput(luo_session_inode);
> > +	kern_unmount(luo_session_mnt);
> > +}
> > -- 
> > 2.47.3
> > 


  parent reply	other threads:[~2026-04-20 14:56 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-18 16:28 [PATCH v8 0/6] liveupdate: new ioctl, change session inode type, bug fixes luca.boccassi
2026-04-18 16:28 ` [PATCH v8 1/6] liveupdate: reject LIVEUPDATE_IOCTL_CREATE_SESSION with invalid name length luca.boccassi
2026-04-19 15:06   ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 2/6] selftests/liveupdate: add test cases for LIVEUPDATE_IOCTL_CREATE_SESSION calls with invalid length luca.boccassi
2026-04-19 15:11   ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 3/6] liveupdate: add LUO_SESSION_MAGIC magic inode type luca.boccassi
2026-04-20 12:26   ` Christian Brauner
2026-04-20 14:22     ` Luca Boccassi
2026-04-20 14:57       ` Pasha Tatashin
2026-04-20 15:05       ` Christian Brauner
2026-04-20 14:55     ` Pasha Tatashin [this message]
2026-04-20 14:59       ` Luca Boccassi
2026-04-20 15:28       ` Christian Brauner
2026-04-20 15:57         ` Pasha Tatashin
2026-04-20 16:39         ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 4/6] selftests/liveupdate: add test case for LUO_SESSION_MAGIC luca.boccassi
2026-04-18 16:28 ` [PATCH v8 5/6] liveupdate: add LIVEUPDATE_SESSION_GET_NAME ioctl luca.boccassi
2026-04-18 16:28 ` [PATCH v8 6/6] selftests/liveupdate: add test cases for LIVEUPDATE_SESSION_GET_NAME luca.boccassi
2026-04-19 15:12   ` Pasha Tatashin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xpi24rt5ek2m2wkhv6celu75hgtgte2x4jhfkjar4gi22r7bdh@ilbxbhfxmm2v \
    --to=pasha.tatashin@soleen.com \
    --cc=brauner@kernel.org \
    --cc=graf@amazon.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luca.boccassi@gmail.com \
    --cc=pratyush@kernel.org \
    --cc=rppt@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox