linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* BUG] [REGRESSION] Regression in 7.0-rc2: WARNING in maple_tree during __mmap_region error path
@ 2026-03-11  0:30 song yang
  0 siblings, 0 replies; only message in thread
From: song yang @ 2026-03-11  0:30 UTC (permalink / raw)
  To: Liam.Howlett; +Cc: linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1928 bytes --]

Hi Liam and MM maintainers,

I have identified a regression in the Linux 7.0-rc2 kernel involving
the Maple Tree iterator during a VMA split error path.
The issue triggers a MAS_WARN_ON in the Maple Tree headers when
fault injection is used to simulate memory allocation failures.

1. Kernel Information:
- Version: 7.0.0-rc2-g5ee8dbf54602
- Architecture: x86_64
- Relevant Configs: CONFIG_FAULT_INJECTION, CONFIG_DEBUG_VM_MAPLE_TREE

2. Problem Description:
A WARNING is triggered at include/linux/maple_tree.h:749 within
__mmap_region+0x1a82/0x2650. This occurs during
a MAP_FIXED mmap call that requires splitting an existing VMA.

When vm_area_dup or similar allocations fail due to fault injection
(via /proc/thread-self/fail-nth), the error rollback path attempts
to reconfigure the VMA iterator.
However, the iterator (mas) appears to be in an inconsistent state
where the new start address exceeds the recorded last address.

Log evidence (register state):
- RSI (start address): 00002000001e3000
- R13 (mas->last):      00002000001e2fff
Violation: start > mas->last.

3. Call Trace Snippet:
[   26.423091] WARNING: ./include/linux/maple_tree.h:749 at
__mmap_region+0x1a82/0x2650
...
[   26.444142]  mmap_region+0x19c/0x360
[   26.444653]  do_mmap+0xcc7/0x11f0
[   26.446925]  vm_mmap_pgoff+0x20a/0x380
[   26.452704]  do_syscall_64+0xf1/0x530

4. Security Impact:
While currently manifesting as a warning, this inconsistency suggests
a foundational failure in the Maple Tree state machine during error
recovery. This could potentially lead to
Use-After-Free (UAF) scenarios or overlapping VMAs, which are
exploitable for local privilege escalation.

5. Reproducer:
The issue was discovered using syzkaller-based fuzzing.
A simplified C reproducer is attached below that utilizes
fault-injection to trigger the failure path.

Reproduction steps:
# echo 13 > /proc/thread-self/fail-nth
# ./repro

Best regards,
Song Yang

[-- Attachment #2: repro.c --]
[-- Type: application/octet-stream, Size: 13928 bytes --]

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/capability.h>
#include <linux/futex.h>

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i = 0;
  for (; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

static void setup_gadgetfs();
static void setup_binderfs();
static void setup_fusectl();
static void sandbox_common_mount_tmpfs(void)
{
  write_file("/proc/sys/fs/mount-max", "100000");
  if (mkdir("./syz-tmp", 0777))
    exit(1);
  if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot", 0777))
    exit(1);
  if (mkdir("./syz-tmp/newroot/dev", 0700))
    exit(1);
  unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot/proc", 0700))
    exit(1);
  if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL))
    exit(1);
  if (mkdir("./syz-tmp/newroot/selinux", 0700))
    exit(1);
  const char* selinux_path = "./syz-tmp/newroot/selinux";
  if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
    if (errno != ENOENT)
      exit(1);
    if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) &&
        errno != ENOENT)
      exit(1);
  }
  if (mkdir("./syz-tmp/newroot/sys", 0700))
    exit(1);
  if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
    exit(1);
  if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL,
            bind_mount_flags, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL,
            bind_mount_flags, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mount("/proc/sys/fs/binfmt_misc",
            "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags,
            NULL) &&
      errno != ENOENT)
    exit(1);
  if (mkdir("./syz-tmp/newroot/syz-inputs", 0700))
    exit(1);
  if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL,
            bind_mount_flags | MS_RDONLY, NULL) &&
      errno != ENOENT)
    exit(1);
  if (mkdir("./syz-tmp/pivot", 0777))
    exit(1);
  if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
    if (chdir("./syz-tmp"))
      exit(1);
  } else {
    if (chdir("/"))
      exit(1);
    if (umount2("./pivot", MNT_DETACH))
      exit(1);
  }
  if (chroot("./newroot"))
    exit(1);
  if (chdir("/"))
    exit(1);
  setup_gadgetfs();
  setup_binderfs();
  setup_fusectl();
}

static void setup_gadgetfs()
{
  if (mkdir("/dev/gadgetfs", 0777)) {
  }
  if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) {
  }
}

static void setup_fusectl()
{
  if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
  }
}

static void setup_binderfs()
{
  if (mkdir("/dev/binderfs", 0777)) {
  }
  if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
  }
  if (symlink("/dev/binderfs", "./binderfs")) {
  }
}

static void loop();

static void sandbox_common()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  if (getppid() == 1)
    exit(1);
  struct rlimit rlim;
  rlim.rlim_cur = rlim.rlim_max = (200 << 20);
  setrlimit(RLIMIT_AS, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 32 << 20;
  setrlimit(RLIMIT_MEMLOCK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 136 << 20;
  setrlimit(RLIMIT_FSIZE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 1 << 20;
  setrlimit(RLIMIT_STACK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 128 << 20;
  setrlimit(RLIMIT_CORE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 256;
  setrlimit(RLIMIT_NOFILE, &rlim);
  if (unshare(CLONE_NEWNS)) {
  }
  if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
  }
  if (unshare(CLONE_NEWIPC)) {
  }
  if (unshare(0x02000000)) {
  }
  if (unshare(CLONE_NEWUTS)) {
  }
  if (unshare(CLONE_SYSVSEM)) {
  }
  typedef struct {
    const char* name;
    const char* value;
  } sysctl_t;
  static const sysctl_t sysctls[] = {
      {"/proc/sys/kernel/shmmax", "16777216"},
      {"/proc/sys/kernel/shmall", "536870912"},
      {"/proc/sys/kernel/shmmni", "1024"},
      {"/proc/sys/kernel/msgmax", "8192"},
      {"/proc/sys/kernel/msgmni", "1024"},
      {"/proc/sys/kernel/msgmnb", "1024"},
      {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
  };
  unsigned i;
  for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
    write_file(sysctls[i].name, sysctls[i].value);
}

static int wait_for_loop(int pid)
{
  if (pid < 0)
    exit(1);
  int status = 0;
  while (waitpid(-1, &status, __WALL) != pid) {
  }
  return WEXITSTATUS(status);
}

static void drop_caps(void)
{
  struct __user_cap_header_struct cap_hdr = {};
  struct __user_cap_data_struct cap_data[2] = {};
  cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
  cap_hdr.pid = getpid();
  if (syscall(SYS_capget, &cap_hdr, &cap_data))
    exit(1);
  const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
  cap_data[0].effective &= ~drop;
  cap_data[0].permitted &= ~drop;
  cap_data[0].inheritable &= ~drop;
  if (syscall(SYS_capset, &cap_hdr, &cap_data))
    exit(1);
}

static int do_sandbox_none(void)
{
  if (unshare(CLONE_NEWPID)) {
  }
  int pid = fork();
  if (pid != 0)
    return wait_for_loop(pid);
  sandbox_common();
  drop_caps();
  if (unshare(CLONE_NEWNET)) {
  }
  write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  sandbox_common_mount_tmpfs();
  loop();
  exit(1);
}

static int inject_fault(int nth)
{
  int fd;
  fd = open("/proc/thread-self/fail-nth", O_RDWR);
  if (fd == -1)
    exit(1);
  char buf[16];
  sprintf(buf, "%d", nth);
  if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
    exit(1);
  return fd;
}

static const char* setup_fault()
{
  int fd = open("/proc/self/make-it-fail", O_WRONLY);
  if (fd == -1)
    return "CONFIG_FAULT_INJECTION is not enabled";
  close(fd);
  fd = open("/proc/thread-self/fail-nth", O_WRONLY);
  if (fd == -1)
    return "kernel does not have systematic fault injection support";
  close(fd);
  static struct {
    const char* file;
    const char* val;
    bool fatal;
  } files[] = {
      {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
      {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  };
  unsigned i;
  for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
    if (!write_file(files[i].file, files[i].val)) {
      if (files[i].fatal)
        return "failed to write fault injection file";
    }
  }
  return NULL;
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void loop(void)
{
  if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
  }
  int i, call, thread;
  for (call = 0; call < 5; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      event_timedwait(&th->done, 50);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
}

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
  intptr_t res = 0;
  switch (call) {
  case 0:
    //  setsockopt$inet_tcp_TCP_CONGESTION arguments: [
    //    fd: sock_tcp (resource)
    //    level: const = 0x6 (4 bytes)
    //    optname: const = 0xd (4 bytes)
    //    optval: ptr[in, buffer] {
    //      buffer: {72 65 6e 6f} (length 0x4)
    //    }
    //    optlen: len = 0x4 (8 bytes)
    //  ]
    memcpy((void*)0x200000000180, "reno", 4);
    syscall(__NR_setsockopt, /*fd=*/(intptr_t)-1, /*level=*/6, /*optname=*/0xd,
            /*optval=*/0x200000000180ul, /*optlen=*/4ul);
    break;
  case 1:
    //  socket$nl_xfrm arguments: [
    //    domain: const = 0x10 (8 bytes)
    //    type: const = 0x3 (8 bytes)
    //    proto: const = 0x6 (4 bytes)
    //  ]
    //  returns sock_nl_xfrm
    res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/6);
    if (res != -1)
      r[0] = res;
    break;
  case 2:
    //  mmap arguments: [
    //    addr: VMA[0x4000]
    //    len: len = 0x4000 (8 bytes)
    //    prot: mmap_prot = 0x1000001 (8 bytes)
    //    flags: mmap_flags = 0x13 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0xf4f66000 (8 bytes)
    //  ]
    syscall(__NR_mmap, /*addr=*/0x2000001e1000ul, /*len=*/0x4000ul,
            /*prot=PROT_GROWSDOWN|PROT_READ*/ 0x1000001ul,
            /*flags=MAP_SHARED_VALIDATE|MAP_FIXED*/ 0x13ul, /*fd=*/r[0],
            /*offset=*/0xf4f66000ul);
    break;
  case 3:
    //  mmap arguments: [
    //    addr: VMA[0x2000]
    //    len: len = 0x2000 (8 bytes)
    //    prot: mmap_prot = 0x7 (8 bytes)
    //    flags: mmap_flags = 0x32 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0x0 (8 bytes)
    //  ]
    syscall(__NR_mmap, /*addr=*/0x2000001e2000ul, /*len=*/0x2000ul,
            /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
            /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/0,
            /*offset=*/0ul);
    break;
  case 4:
    //  mmap arguments: [
    //    addr: VMA[0x4000]
    //    len: len = 0x4000 (8 bytes)
    //    prot: mmap_prot = 0x7 (8 bytes)
    //    flags: mmap_flags = 0x32 (8 bytes)
    //    fd: fd (resource)
    //    offset: intptr = 0x0 (8 bytes)
    //  ]
    inject_fault(13);
    syscall(__NR_mmap, /*addr=*/0x2000001e3000ul, /*len=*/0x4000ul,
            /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
            /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
            /*fd=*/(intptr_t)-1, /*offset=*/0ul);
    break;
  }
}
int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  const char* reason;
  (void)reason;
  if ((reason = setup_fault()))
    printf("the reproducer may not work as expected: fault injection setup "
           "failed: %s\n",
           reason);
  do_sandbox_none();
  return 0;
}

[-- Attachment #3: kernel_crash.log --]
[-- Type: application/octet-stream, Size: 58906 bytes --]

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2026-03-11  0:30 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-03-11  0:30 BUG] [REGRESSION] Regression in 7.0-rc2: WARNING in maple_tree during __mmap_region error path song yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox