From: Kirill Tkhai <ktkhai@virtuozzo.com>
To: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, zhong jiang <zhongjiang@huawei.com>,
syzkaller-bugs@googlegroups.com,
syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com,
Mike Rapoport <rppt@linux.vnet.ibm.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
Peter Xu <peterx@redhat.com>, Dmitry Vyukov <dvyukov@google.com>
Subject: Re: [PATCH 1/2] userfaultfd: use RCU to free the task struct when fork fails
Date: Tue, 26 Mar 2019 11:19:45 +0300 [thread overview]
Message-ID: <b6e5cda0-1134-8a42-3985-ec6728edab43@virtuozzo.com> (raw)
In-Reply-To: <303ae124-dc01-0989-7cca-39ec7331d1be@virtuozzo.com>
On 26.03.2019 11:18, Kirill Tkhai wrote:
> On 26.03.2019 11:07, Kirill Tkhai wrote:
>> On 26.03.2019 01:56, Andrea Arcangeli wrote:
>>> MEMCG depends on the task structure not to be freed under
>>> rcu_read_lock() in get_mem_cgroup_from_mm() after it dereferences
>>> mm->owner.
>>>
>>> An alternate possible fix would be to defer the delivery of the
>>> userfaultfd contexts to the monitor until after fork() is guaranteed
>>> to succeed. Such a change would require more changes because it would
>>> create a strict ordering dependency where the uffd methods would need
>>> to be called beyond the last potentially failing branch in order to be
>>> safe. This solution as opposed only adds the dependency to common code
>>> to set mm->owner to NULL and to free the task struct that was pointed
>>> by mm->owner with RCU, if fork ends up failing. The userfaultfd
>>> methods can still be called anywhere during the fork runtime and the
>>> monitor will keep discarding orphaned "mm" coming from failed forks in
>>> userland.
>>>
>>> This race condition couldn't trigger if CONFIG_MEMCG was set =n at
>>> build time.
>>>
>>> Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event")
>>> Cc: stable@kernel.org
>>> Tested-by: zhong jiang <zhongjiang@huawei.com>
>>> Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com
>>> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
>>> ---
>>> kernel/fork.c | 34 ++++++++++++++++++++++++++++++++--
>>> 1 file changed, 32 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/kernel/fork.c b/kernel/fork.c
>>> index 9dcd18aa210b..a19790e27afd 100644
>>> --- a/kernel/fork.c
>>> +++ b/kernel/fork.c
>>> @@ -952,6 +952,15 @@ static void mm_init_aio(struct mm_struct *mm)
>>> #endif
>>> }
>>>
>>> +static __always_inline void mm_clear_owner(struct mm_struct *mm,
>>> + struct task_struct *p)
>>> +{
>>> +#ifdef CONFIG_MEMCG
>>> + if (mm->owner == p)
>>> + WRITE_ONCE(mm->owner, NULL);
>>> +#endif
>>> +}
>>> +
>>> static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
>>> {
>>> #ifdef CONFIG_MEMCG
>>> @@ -1331,6 +1340,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
>>> free_pt:
>>> /* don't put binfmt in mmput, we haven't got module yet */
>>> mm->binfmt = NULL;
>>> + mm_init_owner(mm, NULL);
>>> mmput(mm);
>>>
>>> fail_nomem:
>>> @@ -1662,6 +1672,24 @@ static inline void rcu_copy_process(struct task_struct *p)
>>> #endif /* #ifdef CONFIG_TASKS_RCU */
>>> }
>>>
>>> +#ifdef CONFIG_MEMCG
>>> +static void __delayed_free_task(struct rcu_head *rhp)
>>> +{
>>> + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
>>> +
>>> + free_task(tsk);
>>> +}
>>> +#endif /* CONFIG_MEMCG */
>>> +
>>> +static __always_inline void delayed_free_task(struct task_struct *tsk)
>>> +{
>>> +#ifdef CONFIG_MEMCG
>>> + call_rcu(&tsk->rcu, __delayed_free_task);
>>> +#else /* CONFIG_MEMCG */
>>> + free_task(tsk);
>>> +#endif /* CONFIG_MEMCG */
>>> +}
>>> +
>>> /*
>>> * This creates a new process as a copy of the old one,
>>> * but does not actually start it yet.
>>> @@ -2123,8 +2151,10 @@ static __latent_entropy struct task_struct *copy_process(
>>> bad_fork_cleanup_namespaces:
>>> exit_task_namespaces(p);
>>> bad_fork_cleanup_mm:
>>> - if (p->mm)
>>> + if (p->mm) {
>>> + mm_clear_owner(p->mm, p);
>>> mmput(p->mm);
>>> + }
>>> bad_fork_cleanup_signal:
>>> if (!(clone_flags & CLONE_THREAD))
>>> free_signal_struct(p->signal);
>>> @@ -2155,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process(
>>> bad_fork_free:
>>> p->state = TASK_DEAD;
>>> put_task_stack(p);
>>> - free_task(p);
>>> + delayed_free_task(p);
>>
>> Can't call_rcu(&p->rcu, delayed_put_task_struct) be used instead this?
>
> I mean:
>
> refcount_set(&tsk->usage, 2);
I.e., refcount_set(&tsk->usage, 1);
> call_rcu(&p->rcu, delayed_put_task_struct);
>
> And:
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 3c516c6f7ce4..27cdf61b51a1 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -715,7 +715,9 @@ static inline void put_signal_struct(struct signal_struct *sig)
>
> void __put_task_struct(struct task_struct *tsk)
> {
> - WARN_ON(!tsk->exit_state);
> + if (!tsk->exit_state)
> + /* Cleanup of copy_process() */
> + goto free;
> WARN_ON(refcount_read(&tsk->usage));
> WARN_ON(tsk == current);
>
> @@ -727,6 +729,7 @@ void __put_task_struct(struct task_struct *tsk)
> put_signal_struct(tsk->signal);
>
> if (!profile_handoff_task(tsk))
> +free:
> free_task(tsk);
> }
> EXPORT_SYMBOL_GPL(__put_task_struct);
>
next prev parent reply other threads:[~2019-03-26 8:19 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-25 22:56 [PATCH 0/2] " Andrea Arcangeli
2019-03-25 22:56 ` [PATCH 1/2] " Andrea Arcangeli
2019-03-26 8:07 ` Kirill Tkhai
2019-03-26 8:18 ` Kirill Tkhai
2019-03-26 8:19 ` Kirill Tkhai [this message]
2019-03-26 8:56 ` Michal Hocko
2019-03-27 0:16 ` Andrea Arcangeli
2019-03-27 8:49 ` Michal Hocko
2019-04-26 4:48 ` Andrew Morton
2019-04-29 3:57 ` [PATCH 1/1 v2] " Andrea Arcangeli
2019-04-29 6:36 ` zhong jiang
2019-04-29 14:37 ` Andrea Arcangeli
2019-03-25 22:56 ` [PATCH 2/2] mm: change mm_update_next_owner() to update mm->owner with WRITE_ONCE Andrea Arcangeli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b6e5cda0-1134-8a42-3985-ec6728edab43@virtuozzo.com \
--to=ktkhai@virtuozzo.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=dvyukov@google.com \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=peterx@redhat.com \
--cc=rppt@linux.vnet.ibm.com \
--cc=syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com \
--cc=syzkaller-bugs@googlegroups.com \
--cc=zhongjiang@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox