Re: Re: [PATCH 3/3] virtio_balloon: Introduce memory recover

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: zhenwei pi <pizhenwei@bytedance.com>
To: David Hildenbrand <david@redhat.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, mst@redhat.com,
	naoya.horiguchi@nec.com, akpm@linux-foundation.org,
	jasowang@redhat.com, virtualization@lists.linux-foundation.org,
	pbonzini@redhat.com, peterx@redhat.com, qemu-devel@nongnu.org
Subject: Re: Re: [PATCH 3/3] virtio_balloon: Introduce memory recover
Date: Mon, 30 May 2022 20:47:34 +0800	[thread overview]
Message-ID: <e870424d-da27-b369-7406-27f7f2983428@bytedance.com> (raw)
In-Reply-To: <612e42f7-1a97-9b01-2d45-d4661911e7a8@redhat.com>



On 5/30/22 15:48, David Hildenbrand wrote:
> 
>> +
>>   struct virtio_balloon {
>>   	struct virtio_device *vdev;
>>   	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
>> @@ -126,6 +133,16 @@ struct virtio_balloon {
>>   	/* Free page reporting device */
>>   	struct virtqueue *reporting_vq;
>>   	struct page_reporting_dev_info pr_dev_info;
>> +
>> +	/* Memory recover VQ - VIRTIO_BALLOON_F_RECOVER */
>> +	struct virtqueue *recover_vq;
>> +	spinlock_t recover_vq_lock;
>> +	struct notifier_block memory_failure_nb;
>> +	struct list_head corrupted_page_list;
>> +	struct list_head recovered_page_list;
>> +	spinlock_t recover_page_list_lock;
>> +	struct __virtio_balloon_recover in_vbr;
>> +	struct work_struct unpoison_memory_work;
> 
> I assume we want all that only with CONFIG_MEMORY_FAILURE.
> 

Sorry, I missed this.

>>   };
>>   
>>   static const struct virtio_device_id id_table[] = {
>> @@ -494,6 +511,198 @@ static void update_balloon_size_func(struct work_struct *work)
>>   		queue_work(system_freezable_wq, work);
>>   }
>>   
>> +/*
>> + * virtballoon_memory_failure - notified by memory failure, try to fix the
>> + *                              corrupted page.
>> + * The memory failure notifier is designed to call back when the kernel handled
>> + * successfully only, WARN_ON_ONCE on the unlikely condition to find out any
>> + * error(memory error handling is a best effort, not 100% coverd).
>> + */
>> +static int virtballoon_memory_failure(struct notifier_block *notifier,
>> +				      unsigned long pfn, void *parm)
>> +{
>> +	struct virtio_balloon *vb = container_of(notifier, struct virtio_balloon,
>> +						 memory_failure_nb);
>> +	struct page *page;
>> +	struct __virtio_balloon_recover *out_vbr;
>> +	struct scatterlist sg;
>> +	unsigned long flags;
>> +	int err;
>> +
>> +	page = pfn_to_online_page(pfn);
>> +	if (WARN_ON_ONCE(!page))
>> +		return NOTIFY_DONE;
>> +
>> +	if (PageHuge(page))
>> +		return NOTIFY_DONE;
>> +
>> +	if (WARN_ON_ONCE(!PageHWPoison(page)))
>> +		return NOTIFY_DONE;
>> +
>> +	if (WARN_ON_ONCE(page_count(page) != 1))
>> +		return NOTIFY_DONE;
> 
> Relying on the page_count to be 1 for correctness is usually a bit
> shaky, for example, when racing against isolate_movable_page() that
> might temporarily bump upo the refcount.
> 

The memory notifier is designed to call the chain if a page gets result 
MF_RECOVERED only:
      if (result == MF_RECOVERED)
          blocking_notifier_call_chain(&mf_notifier_list, pfn, NULL);


>> +
>> +	get_page(page); /* balloon reference */
>> +
>> +	out_vbr = kzalloc(sizeof(*out_vbr), GFP_KERNEL);
> 
> Are we always guaranteed to be able to use GFP_KERNEL out of MCE
> context? (IOW, are we never atomic?)
> 
>> +	if (WARN_ON_ONCE(!out_vbr))
>> +		return NOTIFY_BAD;
>> +
>> +	spin_lock(&vb->recover_page_list_lock);
>> +	balloon_page_push(&vb->corrupted_page_list, page);
>> +	spin_unlock(&vb->recover_page_list_lock);
>> +
>> +	out_vbr->vbr.cmd = VIRTIO_BALLOON_R_CMD_RECOVER;
> 
> This makes me wonder if we should have a more generic guest->host
> request queue, similar to what e.g., virtio-mem uses, instead of adding
> a separate VIRTIO_BALLOON_VQ_RECOVER vq.
> 

I'm OK with either one, I'll follow your decision! :D

>> +	set_page_pfns(vb, out_vbr->pfns, page);
>> +	sg_init_one(&sg, out_vbr, sizeof(*out_vbr));
>> +
>> +	spin_lock_irqsave(&vb->recover_vq_lock, flags);
>> +	err = virtqueue_add_outbuf(vb->recover_vq, &sg, 1, out_vbr, GFP_KERNEL);
>> +	if (unlikely(err)) {
>> +		spin_unlock_irqrestore(&vb->recover_vq_lock, flags);
>> +		return NOTIFY_DONE;
>> +	}
>> +	virtqueue_kick(vb->recover_vq);
>> +	spin_unlock_irqrestore(&vb->recover_vq_lock, flags);
>> +
>> +	return NOTIFY_OK;
>> +}
>> +
>> +static int recover_vq_get_response(struct virtio_balloon *vb)
>> +{
>> +	struct __virtio_balloon_recover *in_vbr;
>> +	struct scatterlist sg;
>> +	unsigned long flags;
>> +	int err;
>> +
>> +	spin_lock_irqsave(&vb->recover_vq_lock, flags);
>> +	in_vbr = &vb->in_vbr;
>> +	memset(in_vbr, 0x00, sizeof(*in_vbr));
>> +	sg_init_one(&sg, in_vbr, sizeof(*in_vbr));
>> +	err = virtqueue_add_inbuf(vb->recover_vq, &sg, 1, in_vbr, GFP_KERNEL);
>> +	if (unlikely(err)) {
>> +		spin_unlock_irqrestore(&vb->recover_vq_lock, flags);
>> +		return err;
>> +	}
>> +
>> +	virtqueue_kick(vb->recover_vq);
>> +	spin_unlock_irqrestore(&vb->recover_vq_lock, flags);
>> +
>> +	return 0;
>> +}
>> +
>> +static void recover_vq_handle_response(struct virtio_balloon *vb, unsigned int len)
>> +{
>> +	struct __virtio_balloon_recover *in_vbr;
>> +	struct virtio_balloon_recover *vbr;
>> +	struct page *page;
>> +	unsigned int pfns;
>> +	u32 pfn0, pfn1;
>> +	__u8 status;
>> +
>> +	/* the response is not expected */
>> +	if (unlikely(len != sizeof(struct __virtio_balloon_recover)))
>> +		return;
>> +
>> +	in_vbr = &vb->in_vbr;
>> +	vbr = &in_vbr->vbr;
>> +	if (unlikely(vbr->cmd != VIRTIO_BALLOON_R_CMD_RESPONSE))
>> +		return;
>> +
>> +	/* to make sure the contiguous balloon PFNs */
>> +	for (pfns = 1; pfns < VIRTIO_BALLOON_PAGES_PER_PAGE; pfns++) {
>> +		pfn0 = virtio32_to_cpu(vb->vdev, in_vbr->pfns[pfns - 1]);
>> +		pfn1 = virtio32_to_cpu(vb->vdev, in_vbr->pfns[pfns]);
>> +		if (pfn1 - pfn0 != 1)
>> +			return;
> 
> Yeah, we really shouldn't be dealing with (legacy) 4k PFNs here, but
> instead, proper ranges I guess.
> 

MST also pointed out this, I explained in this link:
https://lkml.org/lkml/2022/5/26/942

Rather than page reporting style, virtio-mem style should be fine. Ex,
struct virtio_memory_recover {
         __virtio64 addr;
         __virtio32 length;
         __virtio16 padding[2];
};

>> +	}
>> +
>> +	pfn0 = virtio32_to_cpu(vb->vdev, in_vbr->pfns[0]);
>> +	if (!pfn_valid(pfn0))
>> +		return;
>> +
>> +	pfn1 = -1;
>> +	spin_lock(&vb->recover_page_list_lock);
>> +	list_for_each_entry(page, &vb->corrupted_page_list, lru) {
>> +		pfn1 = page_to_pfn(page);
>> +		if (pfn1 == pfn0)
>> +			break;
>> +	}
>> +	spin_unlock(&vb->recover_page_list_lock);
>> +
>> +	status = vbr->status;
>> +	switch (status) {
>> +	case VIRTIO_BALLOON_R_STATUS_RECOVERED:
>> +		if (pfn1 == pfn0) {
>> +			spin_lock(&vb->recover_page_list_lock);
>> +			list_del(&page->lru);
>> +			balloon_page_push(&vb->recovered_page_list, page);
> 
> We rather not reuse actual balloon functions in !balloon context. Just
> move the page to the proper list directly.
> 

OK.

>> +			spin_unlock(&vb->recover_page_list_lock);
>> +			queue_work(system_freezable_wq, &vb->unpoison_memory_work);
>> +			dev_info_ratelimited(&vb->vdev->dev, "recovered pfn 0x%x", pfn0);
> 
> Well, not yet. Shouldn't this go into unpoison_memory_func() ?
> 

OK.

[...]

> 
>>   
>> +out_unregister_reporting:
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
>> +		page_reporting_unregister(&vb->pr_dev_info);
>>   out_unregister_oom:
>>   	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>>   		unregister_oom_notifier(&vb->oom_nb);
>> @@ -1082,6 +1319,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>   		destroy_workqueue(vb->balloon_wq);
>>   	}
>>   
>> +	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_RECOVER)) {
> 
> Could the notifier already have been triggered and we might be using the
> device before already fully initialized from the notifier and might end
> up leaking memory here that we allocated?
> 
>> +		unregister_memory_failure_notifier(&vb->memory_failure_nb);
>> +		cancel_work_sync(&vb->unpoison_memory_work);
>> +	}
>> +
> 
> Could we be leaking memory from the virtballoon_remove() path?
> 

Yes, I'll fix the possible memory leak here.

Thanks a lot.

-- 
zhenwei pi

next prev parent reply	other threads:[~2022-05-30 12:51 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-20  7:06 [PATCH 0/3] recover hardware corrupted page by virtio balloon zhenwei pi
2022-05-20  7:06 ` [PATCH 1/3] memory-failure: Introduce memory failure notifier zhenwei pi
2022-05-30  5:09   ` HORIGUCHI NAOYA(堀口　直也)
2022-05-20  7:06 ` [PATCH 2/3] mm/memory-failure.c: support reset PTE during unpoison zhenwei pi
2022-05-30  5:02   ` HORIGUCHI NAOYA(堀口　直也)
2022-05-30  5:46     ` zhenwei pi
2022-05-30  6:50   ` David Hildenbrand
2022-05-20  7:06 ` [PATCH 3/3] virtio_balloon: Introduce memory recover zhenwei pi
2022-05-20 12:48   ` kernel test robot
2022-05-20 13:39   ` kernel test robot
2022-05-20 15:28   ` kernel test robot
2022-05-24 19:35   ` Sean Christopherson
2022-05-24 23:32     ` zhenwei pi
2022-05-30  7:53       ` David Hildenbrand
2022-05-26 19:18   ` Michael S. Tsirkin
2022-05-27  2:22     ` zhenwei pi
2022-05-30  7:48   ` David Hildenbrand
2022-05-30 12:47     ` zhenwei pi [this message]
2022-05-24 18:59 ` [PATCH 0/3] recover hardware corrupted page by virtio balloon David Hildenbrand
2022-05-27  3:47 ` zhenwei pi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e870424d-da27-b369-7406-27f7f2983428@bytedance.com \
    --to=pizhenwei@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mst@redhat.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox