From: David Hildenbrand <david@redhat.com>
To: michael.day@amd.com, Elliot Berman <quic_eberman@quicinc.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Sean Christopherson <seanjc@google.com>,
Fuad Tabba <tabba@google.com>,
Ackerley Tng <ackerleytng@google.com>,
Mike Rapoport <rppt@kernel.org>, "H. Peter Anvin" <hpa@zytor.com>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
Jonathan Corbet <corbet@lwn.net>,
Trond Myklebust <trondmy@kernel.org>,
Anna Schumaker <anna@kernel.org>,
Mike Marshall <hubcap@omnibond.com>,
Martin Brandenburg <martin@omnibond.com>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Cc: James Gowans <jgowans@amazon.com>,
linux-fsdevel@vger.kernel.org, kvm@vger.kernel.org,
linux-coco@lists.linux.dev, linux-arm-msm@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-doc@vger.kernel.org, linux-nfs@vger.kernel.org,
devel@lists.orangefs.org, linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v4 2/2] mm: guestmem: Convert address_space operations to guestmem library
Date: Fri, 22 Nov 2024 12:03:14 +0100 [thread overview]
Message-ID: <0903abca-42f5-4ca7-8c81-070e7669f996@redhat.com> (raw)
In-Reply-To: <3349f838-2c73-4ef0-aa30-a21e41fb39e5@amd.com>
On 21.11.24 18:40, Mike Day wrote:
>
>
> On 11/21/24 10:43, Elliot Berman wrote:
>> On Wed, Nov 20, 2024 at 10:12:08AM -0800, Elliot Berman wrote:
>>> diff --git a/mm/guestmem.c b/mm/guestmem.c
>>> new file mode 100644
>>> index 0000000000000000000000000000000000000000..19dd7e5d498f07577ec5cec5b52055f7435980f4
>>> --- /dev/null
>>> +++ b/mm/guestmem.c
>>> @@ -0,0 +1,196 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * guestmem library
>>> + *
>>> + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
>>> + */
>>> +
>>> +#include <linux/fs.h>
>>> +#include <linux/guestmem.h>
>>> +#include <linux/mm.h>
>>> +#include <linux/pagemap.h>
>>> +
>>> +struct guestmem {
>>> + const struct guestmem_ops *ops;
>>> +};
>>> +
>>> +static inline struct guestmem *folio_to_guestmem(struct folio *folio)
>>> +{
>>> + struct address_space *mapping = folio->mapping;
>>> +
>>> + return mapping->i_private_data;
>>> +}
>>> +
>>> +static inline bool __guestmem_release_folio(struct address_space *mapping,
>>> + struct folio *folio)
>>> +{
>>> + struct guestmem *gmem = mapping->i_private_data;
>>> + struct list_head *entry;
>>> +
>>> + if (gmem->ops->release_folio) {
>>> + list_for_each(entry, &mapping->i_private_list) {
>>> + if (!gmem->ops->release_folio(entry, folio))
>>> + return false;
>>> + }
>>> + }
>>> +
>>> + return true;
>>> +}
>>> +
>>> +static inline int
>>> +__guestmem_invalidate_begin(struct address_space *const mapping, pgoff_t start,
>>> + pgoff_t end)
>>> +{
>>> + struct guestmem *gmem = mapping->i_private_data;
>>> + struct list_head *entry;
>>> + int ret = 0;
>>> +
>>> + list_for_each(entry, &mapping->i_private_list) {
>>> + ret = gmem->ops->invalidate_begin(entry, start, end);
>>> + if (ret)
>>> + return ret;
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static inline void
>>> +__guestmem_invalidate_end(struct address_space *const mapping, pgoff_t start,
>>> + pgoff_t end)
>>> +{
>>> + struct guestmem *gmem = mapping->i_private_data;
>>> + struct list_head *entry;
>>> +
>>> + if (gmem->ops->invalidate_end) {
>>> + list_for_each(entry, &mapping->i_private_list)
>>> + gmem->ops->invalidate_end(entry, start, end);
>>> + }
>>> +}
>>> +
>>> +static void guestmem_free_folio(struct address_space *mapping,
>>> + struct folio *folio)
>>> +{
>>> + WARN_ON_ONCE(!__guestmem_release_folio(mapping, folio));
>>> +}
>>> +
>>> +static int guestmem_error_folio(struct address_space *mapping,
>>> + struct folio *folio)
>>> +{
>>> + pgoff_t start, end;
>>> + int ret;
>>> +
>>> + filemap_invalidate_lock_shared(mapping);
>>> +
>>> + start = folio->index;
>>> + end = start + folio_nr_pages(folio);
>>> +
>>> + ret = __guestmem_invalidate_begin(mapping, start, end);
>>> + if (ret)
>>> + goto out;
>>> +
>>> + /*
>>> + * Do not truncate the range, what action is taken in response to the
>>> + * error is userspace's decision (assuming the architecture supports
>>> + * gracefully handling memory errors). If/when the guest attempts to
>>> + * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
>>> + * at which point KVM can either terminate the VM or propagate the
>>> + * error to userspace.
>>> + */
>>> +
>>> + __guestmem_invalidate_end(mapping, start, end);
>>> +
>>> +out:
>>> + filemap_invalidate_unlock_shared(mapping);
>>> + return ret ? MF_DELAYED : MF_FAILED;
>>> +}
>>> +
>>> +static int guestmem_migrate_folio(struct address_space *mapping,
>>> + struct folio *dst, struct folio *src,
>>> + enum migrate_mode mode)
>>> +{
>>> + WARN_ON_ONCE(1);
>>> + return -EINVAL;
>>> +}
>>> +
>>> +static const struct address_space_operations guestmem_aops = {
>>> + .dirty_folio = noop_dirty_folio,
>>> + .free_folio = guestmem_free_folio,
>>> + .error_remove_folio = guestmem_error_folio,
>>> + .migrate_folio = guestmem_migrate_folio,
>>> +};
>>> +
>>> +int guestmem_attach_mapping(struct address_space *mapping,
>>> + const struct guestmem_ops *const ops,
>>> + struct list_head *data)
>>> +{
>>> + struct guestmem *gmem;
>>> +
>>> + if (mapping->a_ops == &guestmem_aops) {
>>> + gmem = mapping->i_private_data;
>>> + if (gmem->ops != ops)
>>> + return -EINVAL;
>>> +
>>> + goto add;
>>> + }
>>> +
>>> + gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
>>> + if (!gmem)
>>> + return -ENOMEM;
>>> +
>>> + gmem->ops = ops;
>>> +
>>> + mapping->a_ops = &guestmem_aops;
>>> + mapping->i_private_data = gmem;
>>> +
>>> + mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
>>> + mapping_set_inaccessible(mapping);
>>> + /* Unmovable mappings are supposed to be marked unevictable as well. */
>>> + WARN_ON_ONCE(!mapping_unevictable(mapping));
>>> +
>>> +add:
>>> + list_add(data, &mapping->i_private_list);
>>> + return 0;
>>> +}
>>> +EXPORT_SYMBOL_GPL(guestmem_attach_mapping);
>>> +
>>> +void guestmem_detach_mapping(struct address_space *mapping,
>>> + struct list_head *data)
>>> +{
>>> + list_del(data);
>>> +
>>> + if (list_empty(&mapping->i_private_list)) {
>>> + kfree(mapping->i_private_data);
>>
>> Mike was helping me test this out for SEV-SNP. They helped find a bug
>> here. Right now, when the file closes, KVM calls
>> guestmem_detach_mapping() which will uninstall the ops. When that
>> happens, it's not necessary that all of the folios aren't removed from
>> the filemap yet
Yes, that's the real issue. There either must be some lifetime tracking
(kfree() after the mapping is completely unused), or you have to tear it
all down before you mess with the mapping.
>>
>> There are a few approaches I could take:
>>
>> 1. Create a guestmem superblock so I can register guestmem-specific
>> destroy_inode() to do the kfree() above. This requires a lot of
>> boilerplate code, and I think it's not preferred approach.
>> 2. Update how KVM tracks the memory so it is back in "shared" state when
>> the file closes. This requires some significant rework about the page
>> state compared to current guest_memfd. That rework might be useful
>> for the shared/private state machine.
>> 3. Call truncate_inode_pages(mapping, 0) to force pages to be freed
>> here. It's might be possible that a page is allocated after this
>> point. In order for that to be a problem, KVM would need to update
>> RMP entry as guest-owned, and I don't believe that's possible after
>> the last guestmem_detach_mapping().
>>
>> My preference is to go with #3 as it was the most easy thing to do.
>
> #3 is my preference as well. The semantics are that the guest is "closing" the gmem
> object, which means all the memory is being released from the guest.
Agreed.
--
Cheers,
David / dhildenb
prev parent reply other threads:[~2024-11-22 11:03 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-20 18:12 [PATCH v4 0/2] mm: Refactor KVM guest_memfd to introduce " Elliot Berman
2024-11-20 18:12 ` [PATCH v4 1/2] filemap: Pass address_space mapping to ->free_folio() Elliot Berman
2024-11-21 12:23 ` David Hildenbrand
2024-11-20 18:12 ` [PATCH v4 2/2] mm: guestmem: Convert address_space operations to guestmem library Elliot Berman
2024-11-21 16:43 ` Elliot Berman
2024-11-21 17:40 ` Mike Day
2024-11-22 11:03 ` David Hildenbrand [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0903abca-42f5-4ca7-8c81-070e7669f996@redhat.com \
--to=david@redhat.com \
--cc=ackerleytng@google.com \
--cc=akpm@linux-foundation.org \
--cc=anna@kernel.org \
--cc=brauner@kernel.org \
--cc=corbet@lwn.net \
--cc=devel@lists.orangefs.org \
--cc=hpa@zytor.com \
--cc=hubcap@omnibond.com \
--cc=jack@suse.cz \
--cc=jgowans@amazon.com \
--cc=kvm@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-coco@lists.linux.dev \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nfs@vger.kernel.org \
--cc=martin@omnibond.com \
--cc=michael.day@amd.com \
--cc=pbonzini@redhat.com \
--cc=quic_eberman@quicinc.com \
--cc=rppt@kernel.org \
--cc=seanjc@google.com \
--cc=tabba@google.com \
--cc=trondmy@kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox