From: Pasha Tatashin <pasha.tatashin@soleen.com>
To: Christian Brauner <brauner@kernel.org>
Cc: luca.boccassi@gmail.com, kexec@lists.infradead.org,
linux-mm@kvack.org, graf@amazon.com, rppt@kernel.org,
pasha.tatashin@soleen.com, pratyush@kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v8 3/6] liveupdate: add LUO_SESSION_MAGIC magic inode type
Date: Mon, 20 Apr 2026 14:55:56 +0000 [thread overview]
Message-ID: <xpi24rt5ek2m2wkhv6celu75hgtgte2x4jhfkjar4gi22r7bdh@ilbxbhfxmm2v> (raw)
In-Reply-To: <20260420-unbeeindruckt-besprach-910fd241c32e@brauner>
On 04-20 14:26, Christian Brauner wrote:
> On Sat, Apr 18, 2026 at 05:28:20PM +0100, luca.boccassi@gmail.com wrote:
> > From: Luca Boccassi <luca.boccassi@gmail.com>
> >
> > In userspace when managing LUO sessions we want to be able to identify
> > a FD as a LUO session, in order to be able to do the special handling
> > that they require in order to function as intended on kexec.
> >
> > Currently this requires scraping procfs and doing string matching on
> > the prefix of the dname, which is not an ideal interface.
> >
> > Add a singleton inode type with a magic value, so that we can
> > programmatically identify a fd as a LUO session via fstatfs().
> >
> > Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
> > Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> > ---
> > include/uapi/linux/magic.h | 1 +
> > kernel/liveupdate/luo_core.c | 10 +++-
> > kernel/liveupdate/luo_internal.h | 2 +
> > kernel/liveupdate/luo_session.c | 89 ++++++++++++++++++++++++++++++--
> > 4 files changed, 96 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> > index 4f2da935a76c..4f51005522ff 100644
> > --- a/include/uapi/linux/magic.h
> > +++ b/include/uapi/linux/magic.h
> > @@ -105,5 +105,6 @@
> > #define PID_FS_MAGIC 0x50494446 /* "PIDF" */
> > #define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */
> > #define NULL_FS_MAGIC 0x4E554C4C /* "NULL" */
> > +#define LUO_SESSION_MAGIC 0x4c554f53 /* "LUOS" */
> >
> > #endif /* __LINUX_MAGIC_H__ */
> > diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
> > index dda7bb57d421..f1a63ebe4fa4 100644
> > --- a/kernel/liveupdate/luo_core.c
> > +++ b/kernel/liveupdate/luo_core.c
> > @@ -197,9 +197,17 @@ static int __init luo_late_startup(void)
> > if (!liveupdate_enabled())
> > return 0;
> >
> > + err = luo_session_fs_init();
> > + if (err) {
> > + luo_global.enabled = false;
> > + return err;
> > + }
> > +
> > err = luo_fdt_setup();
> > - if (err)
> > + if (err) {
> > + luo_session_fs_cleanup();
> > luo_global.enabled = false;
> > + }
> >
> > return err;
> > }
> > diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
> > index 8083d8739b09..d4ac7b4c5882 100644
> > --- a/kernel/liveupdate/luo_internal.h
> > +++ b/kernel/liveupdate/luo_internal.h
> > @@ -79,6 +79,8 @@ struct luo_session {
> >
> > int luo_session_create(const char *name, struct file **filep);
> > int luo_session_retrieve(const char *name, struct file **filep);
> > +int __init luo_session_fs_init(void);
> > +void __init luo_session_fs_cleanup(void);
> > int __init luo_session_setup_outgoing(void *fdt);
> > int __init luo_session_setup_incoming(void *fdt);
> > int luo_session_serialize(void);
> > diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c
> > index 5e316a4c5d71..21cbe99fc819 100644
> > --- a/kernel/liveupdate/luo_session.c
> > +++ b/kernel/liveupdate/luo_session.c
> > @@ -50,7 +50,6 @@
> >
> > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> >
> > -#include <linux/anon_inodes.h>
> > #include <linux/cleanup.h>
> > #include <linux/err.h>
> > #include <linux/errno.h>
> > @@ -62,7 +61,10 @@
> > #include <linux/libfdt.h>
> > #include <linux/list.h>
> > #include <linux/liveupdate.h>
> > +#include <linux/magic.h>
> > +#include <linux/mount.h>
> > #include <linux/mutex.h>
> > +#include <linux/pseudo_fs.h>
> > #include <linux/rwsem.h>
> > #include <linux/slab.h>
> > #include <linux/unaligned.h>
> > @@ -363,18 +365,73 @@ static const struct file_operations luo_session_fops = {
> > .unlocked_ioctl = luo_session_ioctl,
> > };
> >
> > +static struct vfsmount *luo_session_mnt __ro_after_init;
> > +static struct inode *luo_session_inode __ro_after_init;
> > +
> > +/*
> > + * Reject all attribute changes on the singleton session inode.
> > + * Without this the VFS falls back to simple_setattr(), allowing
> > + * fchmod()/fchown() to modify the shared inode.
> > + */
> > +static int luo_session_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> > + struct iattr *attr)
>
> Don't duplicate, please. Use the generic helper instead:
>
> int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> struct iattr *attr)
>
> > +{
> > + return -EOPNOTSUPP;
>
>
>
> > +}
> > +
> > +static const struct inode_operations luo_session_inode_operations = {
> > + .setattr = luo_session_setattr,
> > +};
> > +
> > +static char *luo_session_dname(struct dentry *dentry, char *buffer, int buflen)
> > +{
> > + return dynamic_dname(buffer, buflen, "luo_session:%s",
> > + dentry->d_name.name);
>
> Use the luo_session:[%s] which is the canonical format for this
> (ignoring historcal abberations).
>
> > +}
> > +
> > +static const struct dentry_operations luo_session_dentry_operations = {
> > + .d_dname = luo_session_dname,
> > +};
> > +
> > +static int luo_session_init_fs_context(struct fs_context *fc)
> > +{
> > + struct pseudo_fs_context *ctx;
> > +
> > + ctx = init_pseudo(fc, LUO_SESSION_MAGIC);
>
> I'd just call that LUO_FS_MAGIC.
>
> > + if (!ctx)
> > + return -ENOMEM;
> > +
> > + fc->s_iflags |= SB_I_NOEXEC;
> > + fc->s_iflags |= SB_I_NODEV;
>
> ctx->s_d_flags |= DCACHE_DONTCACHE;
>
> static const struct super_operations luo_session_sops = {
> .drop_inode = inode_just_drop,
> .statfs = simple_statfs,
> };
>
>
> > + ctx->dops = &luo_session_dentry_operations;
>
> ctx->ops = &luo_session_sops;
>
> > + return 0;
> > +}
> > +
> > +static struct file_system_type luo_session_fs_type = {
> > + .name = "luo_session",
> > + .init_fs_context = luo_session_init_fs_context,
> > + .kill_sb = kill_anon_super,
> > +};
> > +
> > /* Create a "struct file" for session */
> > static int luo_session_getfile(struct luo_session *session, struct file **filep)
>
> Luo is going full anti-pattern here. This whole return via a function
> argument completely messes up the later codepths. We don't do manual
> get_unused_fd_flags() flags and then file in new code, and then fail
> in-between:
>
> argp->fd = get_unused_fd_flags(O_CLOEXEC);
> if (argp->fd < 0)
> return argp->fd;
>
> err = luo_session_create(argp->name, &file);
> if (err)
> goto err_put_fd;
>
> err = luo_ucmd_respond(ucmd, sizeof(*argp));
> if (err)
> goto err_put_file;
>
> fd_install(argp->fd, file);
>
> Restructure the code so it just becomes:
>
> struct file *luo_session_create(argp->name);
>
> static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
> {
> struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
>
> return FD_ADD(O_CLOEXEC, luo_session_create(argp->name));
> }
>
> and get rid of all this state and error handling. Please fix this.
We cannot do it this way because we must use copy_to_user() to return fd
via ioctl(), and since copy_to_user() may fail, we must do it prior to
fd_install().
Unless there is a specific VFS macro you'd prefer for this
delayed-install pattern, I do not see any other way to do this but
maintain the get_unused_fd_flags() -> copy_to_user() -> fd_install() to
prevent the fd being leaked into the process's table.
>
> > {
> > - char name_buf[128];
> > + char name_buf[LIVEUPDATE_SESSION_NAME_LENGTH + 1];
> > struct file *file;
> >
> > lockdep_assert_held(&session->mutex);
> > - snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
> > - file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
> > - if (IS_ERR(file))
> > +
> > + ihold(luo_session_inode);
>
> Right, you're now sharing the same inode among all luo sessions. So
> you've gained the ability to recognize luo inodes via fstatfs() but you
> still can't compare two luo session file descriptors for equality using
> stat() which is a major win and if you're doing this work anyway, let's
Luca, is there a specific use case in userspace where we need to compare
LUO sessions for equality?
Christian's proposed solution of using unique inodes provides a standard
VFS interface, but it introduces some memory overhead and, more
importantly, a performance overhead due to the extra metadata
allocations required during the performance-critical kexec blackout
window.
I do not mind adding it, but I want to verify if this equality check is
actually needed for your userspace agents, once we extend this, there is
no way back.
> just go the extra step. You can mostly mirror pidfs and it's not
> difficult:
>
> // unique inode numbers for the lifetime of the system instead of using
> // the shared ino allocator that can overflow
>
> DEFINE_COOKIE(luo_session_cookie);
>
> static u64 luo_alloc_ino(void)
> {
> u64 ino;
>
> preempt_disable();
> ino = gen_cookie_next(&luo_ino_cookie);
> preempt_enable();
>
> VFS_WARN_ON_ONCE(ino < 1);
> return ino;
> }
>
> static struct inode *luo_new_inode()
> {
> inode = new_inode_pseudo(sb);
>
> inode->i_flags |= S_IMMUTABLE;
> inode->i_mode |= S_IRWXU;
> inode->i_flags |= S_PRIVATE | S_ANON_INODE;
> simple_inode_init_ts(inode);
>
> inode->i_op = &luo_session_inode_operations;
> // Make sure to leave inode->i_fop unset so it luo fds cannot be reopened via procfs
>
> // Unique 64-bit inode. On 32-bit this will obviously be
> // truncated but a) LUO doesn't support 32-bit b) let's declare
> // anyone that wants LUO on 32-bit insane c) who uses more than
> // 32-bit worth of luo sessions d) if that's really something
> // you want to address you can do so later doing the same thing
> // I did for pidfs (see there for context).
> inode->i_ino = luo_alloc_ino();
> inode->i_generation = pidfs_gen(pid->ino);
>
> simple_inode_init_ts(inode);
> return inode;
> }
>
> Now you can do
>
> stat(fd_luo1, &st1);
> stat(fd_luo2, &st2);
>
> if (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino)
> return true; // same luo session
>
> Sooner or later you will want this. Let's do it correctly, right now.
>
> A bigger design question for the LUO people:
>
> How do the lifetimes of the luo session and the luo fd relate to each
> other? The way the code is structured it looks like the luo session
> thing that you currently store in the file:
>
> file->private_data = session;
>
> outlives the file? Why?
>
> Imho, two things should change in addition to the switch to the tiny fs:
>
> (1) The lifetime of the luo session object should be tied to the
> lifetime of the session file descriptor so that when the last file
> descriptor for that luo session is closed the @session thing gets
> cleaned up. I assume that's the case anyway?
>
> (2) Instead of stashing the session in file->private_data store it in
> inode->private_data. This has various advantages:
>
> * You can stash other information in file->private_data.
> * You can at some point have multiple files referring to the same
> inode/luo session.
>
> > +
> > + snprintf(name_buf, sizeof(name_buf), "%s", session->name);
> > + file = alloc_file_pseudo(luo_session_inode, luo_session_mnt, name_buf,
> > + O_RDWR, &luo_session_fops);
> > + if (IS_ERR(file)) {
> > + iput(luo_session_inode);
> > return PTR_ERR(file);
> > + }
> >
> > + file->private_data = session;
> > *filep = file;
> >
> > return 0;
> > @@ -653,3 +710,25 @@ void luo_session_resume(void)
> > up_write(&luo_session_global.outgoing.rwsem);
> > up_write(&luo_session_global.incoming.rwsem);
> > }
> > +
> > +int __init luo_session_fs_init(void)
> > +{
> > + luo_session_mnt = kern_mount(&luo_session_fs_type);
> > + if (IS_ERR(luo_session_mnt))
>
> panic()
>
> > + return PTR_ERR(luo_session_mnt);
> > +
> > + luo_session_inode = alloc_anon_inode(luo_session_mnt->mnt_sb);
> > + if (IS_ERR(luo_session_inode)) {
> > + kern_unmount(luo_session_mnt);
> > + return PTR_ERR(luo_session_inode);
> > + }
> > + luo_session_inode->i_op = &luo_session_inode_operations;
>
> Kill all of this now that you allocate inodes on-demand.
>
> > +
> > + return 0;
> > +}
> > +
> > +void __init luo_session_fs_cleanup(void)
> > +{
> > + iput(luo_session_inode);
> > + kern_unmount(luo_session_mnt);
> > +}
> > --
> > 2.47.3
> >
next prev parent reply other threads:[~2026-04-20 14:56 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-18 16:28 [PATCH v8 0/6] liveupdate: new ioctl, change session inode type, bug fixes luca.boccassi
2026-04-18 16:28 ` [PATCH v8 1/6] liveupdate: reject LIVEUPDATE_IOCTL_CREATE_SESSION with invalid name length luca.boccassi
2026-04-19 15:06 ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 2/6] selftests/liveupdate: add test cases for LIVEUPDATE_IOCTL_CREATE_SESSION calls with invalid length luca.boccassi
2026-04-19 15:11 ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 3/6] liveupdate: add LUO_SESSION_MAGIC magic inode type luca.boccassi
2026-04-20 12:26 ` Christian Brauner
2026-04-20 14:22 ` Luca Boccassi
2026-04-20 14:57 ` Pasha Tatashin
2026-04-20 15:05 ` Christian Brauner
2026-04-20 14:55 ` Pasha Tatashin [this message]
2026-04-20 14:59 ` Luca Boccassi
2026-04-20 15:28 ` Christian Brauner
2026-04-20 15:57 ` Pasha Tatashin
2026-04-20 16:39 ` Pasha Tatashin
2026-04-18 16:28 ` [PATCH v8 4/6] selftests/liveupdate: add test case for LUO_SESSION_MAGIC luca.boccassi
2026-04-18 16:28 ` [PATCH v8 5/6] liveupdate: add LIVEUPDATE_SESSION_GET_NAME ioctl luca.boccassi
2026-04-18 16:28 ` [PATCH v8 6/6] selftests/liveupdate: add test cases for LIVEUPDATE_SESSION_GET_NAME luca.boccassi
2026-04-19 15:12 ` Pasha Tatashin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=xpi24rt5ek2m2wkhv6celu75hgtgte2x4jhfkjar4gi22r7bdh@ilbxbhfxmm2v \
--to=pasha.tatashin@soleen.com \
--cc=brauner@kernel.org \
--cc=graf@amazon.com \
--cc=kexec@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luca.boccassi@gmail.com \
--cc=pratyush@kernel.org \
--cc=rppt@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox