linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Jann Horn <jannh@google.com>
To: Linux-MM <linux-mm@kvack.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Michal Hocko <mhocko@suse.com>, Hugh Dickins <hughd@google.com>,
	Rik van Riel <riel@redhat.com>
Cc: kernel list <linux-kernel@vger.kernel.org>
Subject: [BUG] mm: direct I/O (using GUP) can write to COW anonymous pages
Date: Mon, 17 Sep 2018 18:12:05 +0200	[thread overview]
Message-ID: <CAG48ez17Of=dnymzm8GAN_CNG1okMg1KTeMtBQhXGP2dyB5uJw@mail.gmail.com> (raw)

[I'm not sure who the best people to ask about this are, I hope the
recipient list resembles something reasonable...]

I have noticed that the dup_mmap() logic on fork() doesn't handle
pages with active direct I/O properly: dup_mmap() seems to assume that
making the PTE referencing a page readonly will always prevent future
writes to the page, but if the kernel has acquired a direct reference
to the page before (e.g. via get_user_pages_fast()), writes can still
happen that way.

The worst-case effect of this - as far as I can tell - is that when a
multithreaded process forks while one thread is in the middle of
sys_read() on a file that uses direct I/O with get_user_pages_fast(),
the read data can become visible in the child while the parent's
buffer stays uninitialized if the parent writes to a relevant page
post-fork before either the I/O completes or the child writes to it.

Reproducer code:

====== START hello.c ======
#define FUSE_USE_VERSION 26

#include <fuse.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <err.h>
#include <sys/uio.h>

static const char *hello_path = "/hello";

static int hello_getattr(const char *path, struct stat *stbuf)
{
        int res = 0;
        memset(stbuf, 0, sizeof(struct stat));
        if (strcmp(path, "/") == 0) {
                stbuf->st_mode = S_IFDIR | 0755;
                stbuf->st_nlink = 2;
        } else if (strcmp(path, hello_path) == 0) {
                stbuf->st_mode = S_IFREG | 0666;
                stbuf->st_nlink = 1;
                stbuf->st_size = 0x1000;
                stbuf->st_blocks = 0;
        } else
                res = -ENOENT;
        return res;
}

static int hello_readdir(const char *path, void *buf, fuse_fill_dir_t
filler, off_t offset, struct fuse_file_info *fi) {
        filler(buf, ".", NULL, 0);
        filler(buf, "..", NULL, 0);
        filler(buf, hello_path + 1, NULL, 0);
        return 0;
}

static int hello_open(const char *path, struct fuse_file_info *fi) {
        return 0;
}

static int hello_read(const char *path, char *buf, size_t size, off_t
offset, struct fuse_file_info *fi) {
        sleep(3);
        size_t len = 0x1000;
        if (offset < len) {
                if (offset + size > len)
                        size = len - offset;
                memset(buf, 0, size);
        } else
                size = 0;
        return size;
}

static int hello_write(const char *path, const char *buf, size_t size,
off_t offset, struct fuse_file_info *fi) {
        while(1) pause();
}

static struct fuse_operations hello_oper = {
        .getattr        = hello_getattr,
        .readdir        = hello_readdir,
        .open           = hello_open,
        .read           = hello_read,
        .write          = hello_write,
};

int main(int argc, char *argv[]) {
        return fuse_main(argc, argv, &hello_oper, NULL);
}
====== END hello.c ======

====== START simple_mmap.c ======
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/mman.h>
#include <err.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <signal.h>
#include <sys/prctl.h>
#include <sys/wait.h>

__attribute__((aligned(0x1000))) char data_buffer_[0x10000];
#define data_buffer (data_buffer_ + 0x8000)

void *fuse_thread(void *dummy) {
        /* step 2: start direct I/O on data_buffer */
        int fuse_fd = open("mount/hello", O_RDWR);
        if (fuse_fd == -1)
                err(1, "unable to open FUSE fd");
        printf("char in parent (before): %hhd\n", data_buffer[0]);
        int res = read(fuse_fd, data_buffer, 0x1000);
        /* step 6: read completes, show post-read state */
        printf("fuse read result: %d\n", res);
        printf("char in parent (after): %hhd\n", data_buffer[0]);
}

int main(void) {
        /* step 1: make data_buffer dirty */
        data_buffer[0] = 1;

        pthread_t thread;
        if (pthread_create(&thread, NULL, fuse_thread, NULL))
                errx(1, "pthread_create");

        sleep(1);
        /* step 3: fork a child */
        pid_t child = fork();
        if (child == -1)
                err(1, "fork");
        if (child == 0) {
                prctl(PR_SET_PDEATHSIG, SIGKILL);
                sleep(1);

                /* step 5: show pre-read state in the child */
                printf("char in child (before): %hhd\n", data_buffer[0]);
                sleep(3);
                /* step 7: read is complete, show post-read state in child */
                printf("char in child (after): %hhd\n", data_buffer[0]);
                return 0;
        }

        /* step 4: de-CoW data_buffer in the parent */
        data_buffer[0x800] = 2;

        int status;
        if (wait(&status) != child)
                err(1, "wait");
}
====== END simple_mmap.c ======

Repro steps:

In one terminal:
$ mkdir mount
$ gcc -o hello hello.c -Wall -std=gnu99 `pkg-config fuse --cflags --libs`
hello.c: In function ‘hello_write’:
hello.c:67:1: warning: no return statement in function returning
non-void [-Wreturn-type]
 }
 ^
$ ./hello -d -o direct_io mount
FUSE library version: 2.9.7
[...]

In a second terminal:
$ gcc -pthread -o simple_mmap simple_mmap.c
$ ./simple_mmap
char in parent (before): 1
char in child (before): 1
fuse read result: 4096
char in parent (after): 1
char in child (after): 0

I have tested that this still works on 4.19.0-rc3+.

As far as I can tell, the fix would be to immediately copy pages with
`refcount - mapcount > N` in dup_mmap(), or something like that?

             reply	other threads:[~2018-09-17 16:12 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-17 16:12 Jann Horn [this message]
2018-09-18  0:05 ` Hugh Dickins
2018-09-18  0:19   ` Salman Qazi
2018-09-18  0:35   ` Jann Horn
2018-09-18  9:13     ` Konstantin Khlebnikov
2018-09-18  9:58     ` Jan Kara
2018-09-26  5:00       ` John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAG48ez17Of=dnymzm8GAN_CNG1okMg1KTeMtBQhXGP2dyB5uJw@mail.gmail.com' \
    --to=jannh@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox