linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Song Liu <song@kernel.org>
To: Aaron Lu <aaron.lu@intel.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	 akpm@linux-foundation.org, x86@kernel.org, peterz@infradead.org,
	hch@lst.de,  kernel-team@fb.com, rick.p.edgecombe@intel.com,
	dave.hansen@intel.com,  urezki@gmail.com
Subject: Re: [RFC v2 3/4] modules, x86: use vmalloc_exec for module core
Date: Thu, 13 Oct 2022 23:07:49 -0700	[thread overview]
Message-ID: <CAPhsuW6qhyuo_M60_rgmK54eBQsestca5dz4wM+rRqpGkKBLAw@mail.gmail.com> (raw)
In-Reply-To: <Y0jcGjzjIYeFzNhv@ziqianlu-desk>

On Thu, Oct 13, 2022 at 8:49 PM Aaron Lu <aaron.lu@intel.com> wrote:
>
> On Fri, Oct 07, 2022 at 04:43:14PM -0700, Song Liu wrote:
> > This is a prototype that allows modules to share 2MB text pages with other
> > modules and BPF programs.
> >
> > Current version only covers core_layout.
> > ---
> >  arch/x86/Kconfig              |  1 +
> >  arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------
> >  arch/x86/kernel/module.c      |  1 +
> >  kernel/module/main.c          | 23 +++++++++++++----------
> >  kernel/module/strict_rwx.c    |  3 ---
> >  kernel/trace/ftrace.c         |  3 ++-
> >  6 files changed, 41 insertions(+), 20 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index f9920f1341c8..0b1ea05a1da6 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -91,6 +91,7 @@ config X86
> >       select ARCH_HAS_SET_DIRECT_MAP
> >       select ARCH_HAS_STRICT_KERNEL_RWX
> >       select ARCH_HAS_STRICT_MODULE_RWX
> > +     select ARCH_WANTS_MODULES_DATA_IN_VMALLOC       if X86_64
> >       select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
> >       select ARCH_HAS_SYSCALL_WRAPPER
> >       select ARCH_HAS_UBSAN_SANITIZE_ALL
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> > index 4f3204364caa..0e47a558c5bc 100644
> > --- a/arch/x86/kernel/alternative.c
> > +++ b/arch/x86/kernel/alternative.c
> > @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
> >
> >               DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
> >
> > -             text_poke_early(instr, insn_buff, insn_buff_sz);
> > +             if (system_state < SYSTEM_RUNNING) {
> > +                     text_poke_early(instr, insn_buff, insn_buff_sz);
> > +             } else {
> > +                     mutex_lock(&text_mutex);
> > +                     text_poke(instr, insn_buff, insn_buff_sz);
> > +                     mutex_unlock(&text_mutex);
> > +             }
> >
> >  next:
> >               optimize_nops(instr, a->instrlen);
> > @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
> >                       optimize_nops(bytes, len);
> >                       DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
> >                       DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
> > -                     text_poke_early(addr, bytes, len);
> > +                     if (system_state == SYSTEM_BOOTING) {
> > +                             text_poke_early(addr, bytes, len);
> > +                     } else {
> > +                             mutex_lock(&text_mutex);
> > +                             text_poke(addr, bytes, len);
> > +                             mutex_unlock(&text_mutex);
> > +                     }
> >               }
> >       }
> >  }
> > @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
> >               if (len == insn.length) {
> >                       DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
> >                       DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
> > -                     text_poke_early(addr, bytes, len);
> > +                     if (unlikely(system_state == SYSTEM_BOOTING)) {
> > +                             text_poke_early(addr, bytes, len);
> > +                     } else {
> > +                             mutex_lock(&text_mutex);
> > +                             text_poke(addr, bytes, len);
> > +                             mutex_unlock(&text_mutex);
> > +                     }
> >               }
> >       }
> >  }
> > @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
> >                */
> >               DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
> >               DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
> > -             text_poke_early(addr, &poison, 4);
> > +             text_poke(addr, &poison, 4);
> >       }
> >  }
> >
> > @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
> >
> >               /* Pad the rest with nops */
> >               add_nops(insn_buff + used, p->len - used);
> > -             text_poke_early(p->instr, insn_buff, p->len);
> > +             text_poke(p->instr, insn_buff, p->len);
>
> Got below warning when booting a VM:
>
> [    0.190098] ------------[ cut here ]------------
> [    0.190377] WARNING: CPU: 0 PID: 0 at /home/aaron/linux/src/arch/x86/kernel/alternative.c:1224 text_poke+0x53/0x60
> [    0.191083] Modules linked in:
> [    0.191269] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-00004-gc49d19177d78 #5
> [    0.191721] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
> [    0.192083] RIP: 0010:text_poke+0x53/0x60
> [    0.192326] Code: c7 c7 20 e7 02 81 5b 5d e9 2a f8 ff ff be ff ff ff ff 48 c7 c7 b0 6d 06 83 48 89 14 24 e8 75 fd bf 00 85 c0 48 8b 14 24 75 c8 <0f> 0b eb c4 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56
> [    0.193083] RSP: 0000:ffffffff83003d60 EFLAGS: 00010246
> [    0.194083] RAX: 0000000000000000 RBX: ffffffff810295b7 RCX: 0000000000000001
> [    0.194506] RDX: 0000000000000006 RSI: ffffffff828b01c5 RDI: ffffffff8293898e
> [    0.195083] RBP: ffffffff83003d82 R08: ffffffff82206520 R09: 0000000000000001
> [    0.195506] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8a9949c0
> [    0.195929] R13: ffffffff8a95f400 R14: 00000000ffffffff R15: 00000000ffffffff
> [    0.196083] FS:  0000000000000000(0000) GS:ffff88842de00000(0000) knlGS:0000000000000000
> [    0.196562] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.197083] CR2: ffff88843ffff000 CR3: 0000000003012001 CR4: 0000000000770ef0
> [    0.197508] PKRU: 55555554
> [    0.197673] Call Trace:
> [    0.197822]  <TASK>
> [    0.198084]  apply_paravirt+0xaf/0x150
> [    0.198313]  ? __might_resched+0x3f/0x280
> [    0.198557]  ? synchronize_rcu+0xe0/0x1c0
> [    0.198799]  ? lock_release+0x230/0x450
> [    0.199030]  ? _raw_spin_unlock_irqrestore+0x30/0x60
> [    0.199083]  ? lockdep_hardirqs_on+0x79/0x100
> [    0.199345]  ? _raw_spin_unlock_irqrestore+0x3b/0x60
> [    0.199643]  ? atomic_notifier_chain_unregister+0x51/0x80
> [    0.200084]  alternative_instructions+0x27/0xfa
> [    0.200357]  check_bugs+0xe08/0xe82
> [    0.200570]  start_kernel+0x692/0x6cc
> [    0.200797]  secondary_startup_64_no_verify+0xe0/0xeb
> [    0.201088]  </TASK>
> [    0.201223] irq event stamp: 13575
> [    0.201428] hardirqs last  enabled at (13583): [<ffffffff811193c2>] __up_console_sem+0x52/0x60
> [    0.202083] hardirqs last disabled at (13592): [<ffffffff811193a7>] __up_console_sem+0x37/0x60
> [    0.202594] softirqs last  enabled at (12762): [<ffffffff8117e169>] cgroup_idr_alloc.constprop.60+0x59/0x100
> [    0.203083] softirqs last disabled at (12750): [<ffffffff8117e13d>] cgroup_idr_alloc.constprop.60+0x2d/0x100
> [    0.203665] ---[ end trace 0000000000000000 ]---
>
> Looks like it is also necessary to differentiate system_state in
> apply_paravirt() like you did in the other apply_XXX() functions.

Thanks for the report! Somehow I didn't see this in my qemu vm.

Song


  reply	other threads:[~2022-10-14  6:08 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-07 23:43 [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-07 23:43 ` [RFC v2 1/4] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-10 18:13   ` Edgecombe, Rick P
2022-10-10 19:04     ` Song Liu
2022-10-10 19:59       ` Edgecombe, Rick P
2022-10-07 23:43 ` [RFC v2 2/4] bpf: use vmalloc_exec Song Liu
2022-10-07 23:43 ` [RFC v2 3/4] modules, x86: use vmalloc_exec for module core Song Liu
2022-10-14  3:48   ` Aaron Lu
2022-10-14  6:07     ` Song Liu [this message]
     [not found]   ` <fb7a38faa52ce0f35061473c9c8b56394a726e59.camel@intel.com>
2022-10-14 18:26     ` Song Liu
2022-10-07 23:43 ` [RFC v2 4/4] vmalloc_exec: share a huge page with kernel text Song Liu
2022-10-10 18:32   ` Edgecombe, Rick P
2022-10-10 19:08     ` Song Liu
2022-10-10 20:09       ` Edgecombe, Rick P
     [not found]         ` <2B66E2E7-7D32-418C-9DFD-1E17180300B4@fb.com>
2022-10-11 20:40           ` Edgecombe, Rick P
2022-10-12  5:37             ` Song Liu
2022-10-12 18:38               ` Edgecombe, Rick P
2022-10-12 19:01                 ` Song Liu
2022-10-08  0:17 ` [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-12 19:03 ` Song Liu
2022-10-17  7:26 ` Christoph Hellwig
2022-10-17 16:23   ` Song Liu
2022-10-18 14:50     ` Christoph Hellwig
2022-10-18 15:05       ` Song Liu
2022-10-18 15:40         ` Christoph Hellwig
2022-10-18 15:40           ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAPhsuW6qhyuo_M60_rgmK54eBQsestca5dz4wM+rRqpGkKBLAw@mail.gmail.com \
    --to=song@kernel.org \
    --cc=aaron.lu@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hch@lst.de \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=peterz@infradead.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=urezki@gmail.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox