linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [BUG] Regression in 7.0-rc2: WARNING in maple_tree during __mmap_region error path
@ 2026-03-11  0:24 song yang
  0 siblings, 0 replies; only message in thread
From: song yang @ 2026-03-11  0:24 UTC (permalink / raw)
  To: liamsg; +Cc: linux-mm, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 2656 bytes --]

*1. Summary*

A regression was identified in the *7.0.0-rc2* kernel where a WARNING is
triggered at include/linux/maple_tree.h:749 during an error rollback in
__mmap_region. The issue occurs when fault injection (failslab) is used to
simulate an allocation failure during a mmap operation that involves a VMA
split. This suggests the Maple Tree iterator enters an inconsistent state
during the error handling path.
*2. Environment Information*

   -

   *Kernel Version:* 7.0.0-rc2-g5ee8dbf54602.
   -

   *Architecture:* x86_64.
   -

   *Config:* CONFIG_FAULT_INJECTION=y, CONFIG_DEBUG_VM_MAPLE_TREE=y.
   -

   *Hardware:* QEMU Ubuntu 24.04 PC v2.

*3. Detailed Description*

The WARNING occurs in __mas_set_range when the iterator is active and a new
range is set where the start address exceeds the current recorded end of
the range.

In the provided logs, the register state shows:

   -

   *RSI (start):* 00002000001e3000
   -

   *R13 (mas->last):* 00002000001e2fff

Because start > mas->last, the assertion MAS_WARN_ON(mas,
mas_is_active(mas) && (mas->index > start || mas->last < start)) fails.
This happens during the rollback of a failed mmap call initiated via do_mmap
and mmap_region.
*4. Call Trace*
Plaintext

[   26.423091] WARNING: ./include/linux/maple_tree.h:749 at
__mmap_region+0x1a82/0x2650
...
[   26.427870] RIP: 0010:__mmap_region+0x1a82/0x2650
...
[   26.439165] Call Trace:
[   26.439499]  <TASK>
[   26.443459]  ? mm_get_unmapped_area_vmflags+0xd7/0x130
[   26.444142]  mmap_region+0x19c/0x360
[   26.444653]  do_mmap+0xcc7/0x11f0
[   26.446925]  vm_mmap_pgoff+0x20a/0x380
[   26.449541]  ksys_mmap_pgoff+0xdb/0x5a0
[   26.452189]  __x64_sys_mmap+0x125/0x190
[   26.452704]  do_syscall_64+0xf1/0x530

*(Full log provided in attachment)*
*5. Reproducer*

The issue can be reproduced consistently by running the attached C program
under a fault-injection-enabled kernel. The program targets address
0x2000001e3000 with MAP_FIXED and utilizes /proc/thread-self/fail-nth to
trigger a slab allocation failure.

*Steps to reproduce:*

   1.

   Compile the reproducer: gcc -static repro.c -o repro -lpthread.
   2.

   Enable fault injection: echo 1 > /sys/kernel/debug/failslab/cache-filter.
   3.

   Run the binary as root: ./repro.

*6. Potential Root Cause*

The transition to the new VMA management logic in 7.0 seems to have
introduced an edge case in __mmap_region. When a helper like anon_vma_clone
fails, the error path attempts to reconfigure the VMA iterator for cleanup
without resetting its state (e.g., via mas_reset), leading to a range
violation if the iterator was positioned at the boundary of the previous
slot.

[-- Attachment #1.2: Type: text/html, Size: 4720 bytes --]

[-- Attachment #2: repro.c --]
[-- Type: application/octet-stream, Size: 13928 bytes --]

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/capability.h>
#include <linux/futex.h>

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i = 0;
  for (; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

static void setup_gadgetfs();
static void setup_binderfs();
static void setup_fusectl();
static void sandbox_common_mount_tmpfs(void)
{
  write_file("/proc/sys/fs/mount-max", "100000");
  if (mkdir("./syz-tmp", 0777))
    exit(1);
  if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot", 0777))
    exit(1);
  if (mkdir("./syz-tmp/newroot/dev", 0700))
    exit(1);
  unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot/proc", 0700))
    exit(1);
  if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot/selinux", 0700))
    exit(1);
  const char* selinux_path = "./syz-tmp/newroot/selinux";
  if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
    if (errno != ENOENT)
      exit(1);
    if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) &&
        errno != ENOENT)
      exit(1);
  }
  if (mkdir("./syz-tmp/newroot/sys", 0700))
    exit(1);
  if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
    exit(1);
  if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL,
            bind_mount_flags, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL,
            bind_mount_flags, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mount("/proc/sys/fs/binfmt_misc",
            "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags,
            NULL) &&
      errno != ENOENT)
    exit(1);
  if (mkdir("./syz-tmp/newroot/syz-inputs", 0700))
    exit(1);
  if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL,
            bind_mount_flags | MS_RDONLY, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mkdir("./syz-tmp/pivot", 0777))
    exit(1);
  if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
    if (chdir("./syz-tmp"))
      exit(1);
  } else {
    if (chdir("/"))
      exit(1);
    if (umount2("./pivot", MNT_DETACH))
      exit(1);
  }
  if (chroot("./newroot"))
    exit(1);
  if (chdir("/"))
    exit(1);
  setup_gadgetfs();
  setup_binderfs();
  setup_fusectl();
}

static void setup_gadgetfs()
{
  if (mkdir("/dev/gadgetfs", 0777)) {
  }
  if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) {
  }
}

static void setup_fusectl()
{
  if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
  }
}

static void setup_binderfs()
{
  if (mkdir("/dev/binderfs", 0777)) {
  }
  if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
  }
  if (symlink("/dev/binderfs", "./binderfs")) {
  }
}

static void loop();

static void sandbox_common()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  if (getppid() == 1)
    exit(1);
  struct rlimit rlim;
  rlim.rlim_cur = rlim.rlim_max = (200 << 20);
  setrlimit(RLIMIT_AS, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 32 << 20;
  setrlimit(RLIMIT_MEMLOCK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 136 << 20;
  setrlimit(RLIMIT_FSIZE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 1 << 20;
  setrlimit(RLIMIT_STACK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 128 << 20;
  setrlimit(RLIMIT_CORE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 256;
  setrlimit(RLIMIT_NOFILE, &rlim);
  if (unshare(CLONE_NEWNS)) {
  }
  if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
  }
  if (unshare(CLONE_NEWIPC)) {
  }
  if (unshare(0x02000000)) {
  }
  if (unshare(CLONE_NEWUTS)) {
  }
  if (unshare(CLONE_SYSVSEM)) {
  }
  typedef struct {
    const char* name;
    const char* value;
  } sysctl_t;
  static const sysctl_t sysctls[] = {
      {"/proc/sys/kernel/shmmax", "16777216"},
      {"/proc/sys/kernel/shmall", "536870912"},
      {"/proc/sys/kernel/shmmni", "1024"},
      {"/proc/sys/kernel/msgmax", "8192"},
      {"/proc/sys/kernel/msgmni", "1024"},
      {"/proc/sys/kernel/msgmnb", "1024"},
      {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
  };
  unsigned i;
  for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
    write_file(sysctls[i].name, sysctls[i].value);
}

static int wait_for_loop(int pid)
{
  if (pid < 0)
    exit(1);
  int status = 0;
  while (waitpid(-1, &status, __WALL) != pid) {
  }
  return WEXITSTATUS(status);
}

static void drop_caps(void)
{
  struct __user_cap_header_struct cap_hdr = {};
  struct __user_cap_data_struct cap_data[2] = {};
  cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
  cap_hdr.pid = getpid();
  if (syscall(SYS_capget, &cap_hdr, &cap_data))
    exit(1);
  const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
  cap_data[0].effective &= ~drop;
  cap_data[0].permitted &= ~drop;
  cap_data[0].inheritable &= ~drop;
  if (syscall(SYS_capset, &cap_hdr, &cap_data))
    exit(1);
}

static int do_sandbox_none(void)
{
  if (unshare(CLONE_NEWPID)) {
  }
  int pid = fork();
  if (pid != 0)
    return wait_for_loop(pid);
  sandbox_common();
  drop_caps();
  if (unshare(CLONE_NEWNET)) {
  }
  write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  sandbox_common_mount_tmpfs();
  loop();
  exit(1);
}

static int inject_fault(int nth)
{
  int fd;
  fd = open("/proc/thread-self/fail-nth", O_RDWR);
  if (fd == -1)
    exit(1);
  char buf[16];
  sprintf(buf, "%d", nth);
  if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
    exit(1);
  return fd;
}

static const char* setup_fault()
{
  int fd = open("/proc/self/make-it-fail", O_WRONLY);
  if (fd == -1)
    return "CONFIG_FAULT_INJECTION is not enabled";
  close(fd);
  fd = open("/proc/thread-self/fail-nth", O_WRONLY);
  if (fd == -1)
    return "kernel does not have systematic fault injection support";
  close(fd);
  static struct {
    const char* file;
    const char* val;
    bool fatal;
  } files[] = {
      {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
      {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  };
  unsigned i;
  for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
    if (!write_file(files[i].file, files[i].val)) {
      if (files[i].fatal)
        return "failed to write fault injection file";
    }
  }
  return NULL;
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void loop(void)
{
  if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
  }
  int i, call, thread;
  for (call = 0; call < 5; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      event_timedwait(&th->done, 50);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
}

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
  intptr_t res = 0;
  switch (call) {
  case 0:
    //  setsockopt$inet_tcp_TCP_CONGESTION arguments: [
    //    fd: sock_tcp (resource)
    //    level: const = 0x6 (4 bytes)
    //    optname: const = 0xd (4 bytes)
    //    optval: ptr[in, buffer] {
    //      buffer: {72 65 6e 6f} (length 0x4)
    //    }
    //    optlen: len = 0x4 (8 bytes)
    //  ]
    memcpy((void*)0x200000000180, "reno", 4);
    syscall(__NR_setsockopt, /*fd=*/(intptr_t)-1, /*level=*/6, /*optname=*/0xd,
            /*optval=*/0x200000000180ul, /*optlen=*/4ul);
    break;
  case 1:
    //  socket$nl_xfrm arguments: [
    //    domain: const = 0x10 (8 bytes)
    //    type: const = 0x3 (8 bytes)
    //    proto: const = 0x6 (4 bytes)
    //  ]
    //  returns sock_nl_xfrm
    res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/6);
    if (res != -1)
      r[0] = res;
    break;
  case 2:
    //  mmap arguments: [
    //    addr: VMA[0x4000]
    //    len: len = 0x4000 (8 bytes)
    //    prot: mmap_prot = 0x1000001 (8 bytes)
    //    flags: mmap_flags = 0x13 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0xf4f66000 (8 bytes)
    //  ]
    syscall(__NR_mmap, /*addr=*/0x2000001e1000ul, /*len=*/0x4000ul,
            /*prot=PROT_GROWSDOWN|PROT_READ*/ 0x1000001ul,
            /*flags=MAP_SHARED_VALIDATE|MAP_FIXED*/ 0x13ul, /*fd=*/r[0],
            /*offset=*/0xf4f66000ul);
    break;
  case 3:
    //  mmap arguments: [
    //    addr: VMA[0x2000]
    //    len: len = 0x2000 (8 bytes)
    //    prot: mmap_prot = 0x7 (8 bytes)
    //    flags: mmap_flags = 0x32 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0x0 (8 bytes)
    //  ]
    syscall(__NR_mmap, /*addr=*/0x2000001e2000ul, /*len=*/0x2000ul,
            /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
            /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/0,
            /*offset=*/0ul);
    break;
  case 4:
    //  mmap arguments: [
    //    addr: VMA[0x4000]
    //    len: len = 0x4000 (8 bytes)
    //    prot: mmap_prot = 0x7 (8 bytes)
    //    flags: mmap_flags = 0x32 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0x0 (8 bytes)
    //  ]
    inject_fault(13);
    syscall(__NR_mmap, /*addr=*/0x2000001e3000ul, /*len=*/0x4000ul,
            /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
            /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
            /*fd=*/(intptr_t)-1, /*offset=*/0ul);
    break;
  }
}
int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  const char* reason;
  (void)reason;
  if ((reason = setup_fault()))
    printf("the reproducer may not work as expected: fault injection setup "
           "failed: %s\n",
           reason);
  do_sandbox_none();
  return 0;
}

[-- Attachment #3: kernel_crash.log --]
[-- Type: application/octet-stream, Size: 58906 bytes --]

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2026-03-11  0:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-03-11  0:24 [BUG] Regression in 7.0-rc2: WARNING in maple_tree during __mmap_region error path song yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox