Greeting, FYI, we noticed the following commit (built with gcc-9): commit: c895b784c699224d690c7dfbdcff309df82366e3 ("loop: don't hold lo_mutex during __loop_clr_fd()") https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master in testcase: stress-ng version: stress-ng-x86_64-0.11-06_20211126 with following parameters: nr_threads: 100% disk: 1HDD testtime: 60s class: device test: loop cpufreq_governor: performance ucode: 0xb000280 on test machine: 96 threads 2 sockets Ice Lake with 256G memory caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace): If you fix the issue, kindly add following tag Reported-by: kernel test robot [ 194.222111][ C71] rcu: INFO: rcu_sched self-detected stall on CPU [ 194.222114][ C71] rcu: 71-....: (14353 ticks this GP) idle=dfb/1/0x4000000000000000 softirq=9274/9274 fqs=21431 [ 194.222119][ C71] (t=100001 jiffies g=12029 q=232127) [ 194.222123][ C71] NMI backtrace for cpu 71 [ 194.222125][ C71] CPU: 71 PID: 5857 Comm: stress-ng Not tainted 5.16.0-rc2-00042-gc895b784c699 #1 [ 194.222127][ C71] Call Trace: [ 194.222128][ C71] [ 194.222129][ C71] dump_stack_lvl (lib/dump_stack.c:107) [ 194.222137][ C71] nmi_cpu_backtrace.cold (lib/nmi_backtrace.c:113) [ 194.222139][ C71] ? lapic_can_unplug_cpu (arch/x86/kernel/apic/hw_nmi.c:33) [ 194.222143][ C71] nmi_trigger_cpumask_backtrace (lib/nmi_backtrace.c:62) [ 194.222147][ C71] rcu_dump_cpu_stacks (kernel/rcu/tree_stall.h:339 (discriminator 5)) [ 194.222151][ C71] rcu_sched_clock_irq.cold (kernel/rcu/tree_stall.h:629 kernel/rcu/tree_stall.h:711 kernel/rcu/tree.c:3878 kernel/rcu/tree.c:2597) [ 194.222153][ C71] update_process_times (arch/x86/include/asm/preempt.h:27 kernel/time/timer.c:1787) [ 194.222158][ C71] tick_sched_handle+0x1f/0x80 [ 194.222162][ C71] tick_sched_timer (kernel/time/tick-sched.c:1426) [ 194.222163][ C71] ? tick_sched_do_timer (kernel/time/tick-sched.c:1408) [ 194.222164][ C71] __hrtimer_run_queues (kernel/time/hrtimer.c:1685 kernel/time/hrtimer.c:1749) [ 194.222166][ C71] hrtimer_interrupt (kernel/time/hrtimer.c:1814) [ 194.222168][ C71] __sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1086 arch/x86/kernel/apic/apic.c:1103) [ 194.222172][ C71] sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1097 (discriminator 14)) [ 194.222176][ C71] [ 194.222177][ C71] [ 194.222178][ C71] asm_sysvec_apic_timer_interrupt (arch/x86/include/asm/idtentry.h:638) [ 194.222183][ C71] RIP: 0010:console_unlock (kernel/printk/printk.c:2719) [ 194.222187][ C71] Code: 00 44 89 e6 48 c7 c7 40 72 c3 82 e8 99 3c 01 00 65 ff 0d d2 f9 eb 7e e9 dd fd ff ff e8 88 1b 00 00 4d 85 ed 0f 85 e3 00 00 00 <8b> 44 24 0c 85 c0 0f 84 e4 fc ff ff 31 d2 be a0 0a 00 00 48 c7 c7 All code ======== 0: 00 44 89 e6 add %al,-0x1a(%rcx,%rcx,4) 4: 48 c7 c7 40 72 c3 82 mov $0xffffffff82c37240,%rdi b: e8 99 3c 01 00 callq 0x13ca9 10: 65 ff 0d d2 f9 eb 7e decl %gs:0x7eebf9d2(%rip) # 0x7eebf9e9 17: e9 dd fd ff ff jmpq 0xfffffffffffffdf9 1c: e8 88 1b 00 00 callq 0x1ba9 21: 4d 85 ed test %r13,%r13 24: 0f 85 e3 00 00 00 jne 0x10d 2a:* 8b 44 24 0c mov 0xc(%rsp),%eax <-- trapping instruction 2e: 85 c0 test %eax,%eax 30: 0f 84 e4 fc ff ff je 0xfffffffffffffd1a 36: 31 d2 xor %edx,%edx 38: be a0 0a 00 00 mov $0xaa0,%esi 3d: 48 rex.W 3e: c7 .byte 0xc7 3f: c7 .byte 0xc7 Code starting with the faulting instruction =========================================== 0: 8b 44 24 0c mov 0xc(%rsp),%eax 4: 85 c0 test %eax,%eax 6: 0f 84 e4 fc ff ff je 0xfffffffffffffcf0 c: 31 d2 xor %edx,%edx e: be a0 0a 00 00 mov $0xaa0,%esi 13: 48 rex.W 14: c7 .byte 0xc7 15: c7 .byte 0xc7 [ 194.222189][ C71] RSP: 0018:ffa00000222dbb50 EFLAGS: 00000206 [ 194.222191][ C71] RAX: 0000000000000000 RBX: ffffffff8357d8b8 RCX: 0000000000000008 [ 194.222192][ C71] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff8357d8b8 [ 194.222193][ C71] RBP: 000000000000004d R08: 0000000000000300 R09: 0000000000000000 [ 194.222193][ C71] R10: 0000000000000001 R11: ffffffffc0598080 R12: 0000000000000000 [ 194.222194][ C71] R13: 0000000000000200 R14: 0000000000000000 R15: 0000000000000000 [ 194.222197][ C71] vprintk_emit (arch/x86/include/asm/preempt.h:85 kernel/printk/printk.c:2246) [ 194.222200][ C71] ? kernfs_add_one (fs/kernfs/dir.c:766) [ 194.222202][ C71] _printk (kernel/printk/printk.c:2270) [ 194.222204][ C71] set_capacity_and_notify.cold (block/genhd.c:96) [ 194.222207][ C71] loop_set_size+0x11/0x40 loop [ 194.222211][ C71] loop_configure (drivers/block/loop.c:1057) loop [ 194.222213][ C71] lo_ioctl (drivers/block/loop.c:1546) loop [ 194.222216][ C71] blkdev_ioctl (block/ioctl.c:588) [ 194.222221][ C71] ? do_sys_openat2 (fs/open.c:1222) [ 194.222225][ C71] __x64_sys_ioctl (fs/ioctl.c:51 fs/ioctl.c:874 fs/ioctl.c:860 fs/ioctl.c:860) [ 194.222230][ C71] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [ 194.222231][ C71] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113) [ 194.222233][ C71] RIP: 0033:0x7f40e173b427 [ 194.222235][ C71] Code: 00 00 90 48 8b 05 69 aa 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 39 aa 0c 00 f7 d8 64 89 01 48 All code ======== 0: 00 00 add %al,(%rax) 2: 90 nop 3: 48 8b 05 69 aa 0c 00 mov 0xcaa69(%rip),%rax # 0xcaa73 a: 64 c7 00 26 00 00 00 movl $0x26,%fs:(%rax) 11: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 18: c3 retq 19: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 20: 00 00 00 23: b8 10 00 00 00 mov $0x10,%eax 28: 0f 05 syscall 2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction 30: 73 01 jae 0x33 32: c3 retq 33: 48 8b 0d 39 aa 0c 00 mov 0xcaa39(%rip),%rcx # 0xcaa73 3a: f7 d8 neg %eax 3c: 64 89 01 mov %eax,%fs:(%rcx) 3f: 48 rex.W Code starting with the faulting instruction =========================================== 0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 6: 73 01 jae 0x9 8: c3 retq 9: 48 8b 0d 39 aa 0c 00 mov 0xcaa39(%rip),%rcx # 0xcaa49 10: f7 d8 neg %eax 12: 64 89 01 mov %eax,%fs:(%rcx) 15: 48 rex.W [ 194.222236][ C71] RSP: 002b:00007ffdaea025a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 194.222238][ C71] RAX: ffffffffffffffda RBX: 0000000000000028 RCX: 00007f40e173b427 [ 194.222238][ C71] RDX: 0000000000000005 RSI: 0000000000004c00 RDI: 0000000000000007 [ 194.222239][ C71] RBP: 0000000000000006 R08: 0000000000000000 R09: 00007ffdaea02316 [ 194.222240][ C71] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000007 [ 194.222240][ C71] R13: 00000000000003e8 R14: 00007ffdaea06980 R15: 00007ffdaea038b0 [ 194.222242][ C71] To reproduce: git clone https://github.com/intel/lkp-tests.git cd lkp-tests sudo bin/lkp install job.yaml # job file is attached in this email bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run sudo bin/lkp run generated-yaml-file # if come across any failure that blocks the test, # please remove ~/.lkp and /lkp dir to run from a clean state. --- 0DAY/LKP+ Test Infrastructure Open Source Technology Center https://lists.01.org/hyperkitty/list/lkp@lists.01.org Intel Corporation Thanks, Oliver Sang