linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Matthew Wilcox <willy@infradead.org>,
	David Hildenbrand <david@redhat.com>,
	Liu Shixin <liushixin2@huawei.com>, Jiri Olsa <jolsa@kernel.org>,
	Jens Axboe <axboe@kernel.dk>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: Re: [PATCH v4 4/4] mm: vmalloc: convert vread() to vread_iter()
Date: Wed, 22 Mar 2023 20:55:23 +0800	[thread overview]
Message-ID: <ZBr6u+rivMztIvn9@MiWiFi-R3L-srv> (raw)
In-Reply-To: <6b3899bbbf1f4bd6b7133c8b6f27b3a8791607b0.1679431886.git.lstoakes@gmail.com>

On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
......
> Additionally, we must account for the fact that at any point a copy may
> fail if this happens, we exit indicating fewer bytes retrieved than
> expected.
......
> -static int aligned_vread(char *buf, char *addr, unsigned long count)
> +/*
> + * small helper routine, copy contents to iter from addr.
> + * If the page is not present, fill zero.
> + *
> + * Returns the number of copied bytes.
> + */
> +static size_t aligned_vread_iter(struct iov_iter *iter,
> +				 const char *addr, size_t count)
>  {
> -	struct page *p;
> -	int copied = 0;
> +	size_t remains = count;
> +	struct page *page;
>  
> -	while (count) {
> +	while (remains > 0) {
>  		unsigned long offset, length;
> +		size_t copied = 0;
>  
>  		offset = offset_in_page(addr);
>  		length = PAGE_SIZE - offset;
> -		if (length > count)
> -			length = count;
> -		p = vmalloc_to_page(addr);
> +		if (length > remains)
> +			length = remains;
> +		page = vmalloc_to_page(addr);
>  		/*
> -		 * To do safe access to this _mapped_ area, we need
> -		 * lock. But adding lock here means that we need to add
> -		 * overhead of vmalloc()/vfree() calls for this _debug_
> -		 * interface, rarely used. Instead of that, we'll use
> -		 * kmap() and get small overhead in this access function.
> +		 * To do safe access to this _mapped_ area, we need lock. But
> +		 * adding lock here means that we need to add overhead of
> +		 * vmalloc()/vfree() calls for this _debug_ interface, rarely
> +		 * used. Instead of that, we'll use an local mapping via
> +		 * copy_page_to_iter_atomic() and accept a small overhead in
> +		 * this access function.
>  		 */
> -		if (p) {
> -			/* We can expect USER0 is not used -- see vread() */
> -			void *map = kmap_atomic(p);
> -			memcpy(buf, map + offset, length);
> -			kunmap_atomic(map);
> -		} else
> -			memset(buf, 0, length);
> +		if (page)
> +			copied = copy_page_to_iter_atomic(page, offset, length,
> +							  iter);
> +

If we decided to quit at any failing copy point, indicating fewer bytes
retrieved than expected, wondering why we don't quit here if
copy_page_to_iter_atomic() failed? 

The original zero filling is for unmapped vmalloc area, but not for
reading area if failed. Not sure if I got your point.

> +		/* Zero anything we were unable to copy. */
> +		copied += zero_iter(iter, length - copied);
> +
> +		addr += copied;
> +		remains -= copied;
>  
> -		addr += length;
> -		buf += length;
> -		copied += length;
> -		count -= length;
> +		if (copied != length)
> +			break;
>  	}
> -	return copied;
> +
> +	return count - remains;
>  }
>  
> -static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
> +/*
> + * Read from a vm_map_ram region of memory.
> + *
> + * Returns the number of copied bytes.
> + */
> +static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
> +				  size_t count, unsigned long flags)
>  {
>  	char *start;
>  	struct vmap_block *vb;
>  	unsigned long offset;
> -	unsigned int rs, re, n;
> +	unsigned int rs, re;
> +	size_t remains, n;
>  
>  	/*
>  	 * If it's area created by vm_map_ram() interface directly, but
>  	 * not further subdividing and delegating management to vmap_block,
>  	 * handle it here.
>  	 */
> -	if (!(flags & VMAP_BLOCK)) {
> -		aligned_vread(buf, addr, count);
> -		return;
> -	}
> +	if (!(flags & VMAP_BLOCK))
> +		return aligned_vread_iter(iter, addr, count);
>  
>  	/*
>  	 * Area is split into regions and tracked with vmap_block, read out
> @@ -3505,50 +3537,65 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
>  	 */
>  	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
>  	if (!vb)
> -		goto finished;
> +		goto finished_zero;
>  
>  	spin_lock(&vb->lock);
>  	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
>  		spin_unlock(&vb->lock);
> -		goto finished;
> +		goto finished_zero;
>  	}
> +
> +	remains = count;
>  	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> -		if (!count)
> -			break;
> +		size_t copied;
> +
> +		if (remains == 0)
> +			goto finished;
> +
>  		start = vmap_block_vaddr(vb->va->va_start, rs);
> -		while (addr < start) {
> -			if (count == 0)
> -				goto unlock;
> -			*buf = '\0';
> -			buf++;
> -			addr++;
> -			count--;
> +
> +		if (addr < start) {
> +			size_t to_zero = min_t(size_t, start - addr, remains);
> +			size_t zeroed = zero_iter(iter, to_zero);
> +
> +			addr += zeroed;
> +			remains -= zeroed;
> +
> +			if (remains == 0 || zeroed != to_zero)
> +				goto finished;
>  		}
> +
>  		/*it could start reading from the middle of used region*/
>  		offset = offset_in_page(addr);
>  		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
> -		if (n > count)
> -			n = count;
> -		aligned_vread(buf, start+offset, n);
> +		if (n > remains)
> +			n = remains;
> +
> +		copied = aligned_vread_iter(iter, start + offset, n);
>  
> -		buf += n;
> -		addr += n;
> -		count -= n;
> +		addr += copied;
> +		remains -= copied;
> +
> +		if (copied != n)
> +			goto finished;
>  	}
> -unlock:
> +
>  	spin_unlock(&vb->lock);
>  
> -finished:
> +finished_zero:
>  	/* zero-fill the left dirty or free regions */
> -	if (count)
> -		memset(buf, 0, count);
> +	return count - remains + zero_iter(iter, remains);

When it jumps to finished_zero, local varialble 'remains' is still 0, we
do nothing here but return count. It doesn't look so right. You may need
to move up the 'remains' assignment as 'count' line.

> +finished:
> +	/* We couldn't copy/zero everything */
> +	spin_unlock(&vb->lock);
> +	return count - remains;
>  }
>  
>  /**
> - * vread() - read vmalloc area in a safe way.
> - * @buf:     buffer for reading data
> - * @addr:    vm address.
> - * @count:   number of bytes to be read.
> + * vread_iter() - read vmalloc area in a safe way to an iterator.
> + * @iter:         the iterator to which data should be written.
> + * @addr:         vm address.
> + * @count:        number of bytes to be read.
>   *
>   * This function checks that addr is a valid vmalloc'ed area, and
>   * copy data from that area to a given buffer. If the given memory range
> @@ -3568,13 +3615,12 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
>   * (same number as @count) or %0 if [addr...addr+count) doesn't
>   * include any intersection with valid vmalloc area
>   */
> -long vread(char *buf, char *addr, unsigned long count)
> +long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
>  {
>  	struct vmap_area *va;
>  	struct vm_struct *vm;
> -	char *vaddr, *buf_start = buf;
> -	unsigned long buflen = count;
> -	unsigned long n, size, flags;
> +	char *vaddr;
> +	size_t n, size, flags, remains;
>  
>  	addr = kasan_reset_tag(addr);
>  
> @@ -3582,18 +3628,22 @@ long vread(char *buf, char *addr, unsigned long count)
>  	if ((unsigned long) addr + count < count)
>  		count = -(unsigned long) addr;
>  
> +	remains = count;
> +
>  	spin_lock(&vmap_area_lock);
>  	va = find_vmap_area_exceed_addr((unsigned long)addr);
>  	if (!va)
> -		goto finished;
> +		goto finished_zero;
>  
>  	/* no intersects with alive vmap_area */
> -	if ((unsigned long)addr + count <= va->va_start)
> -		goto finished;
> +	if ((unsigned long)addr + remains <= va->va_start)
> +		goto finished_zero;
>  
>  	list_for_each_entry_from(va, &vmap_area_list, list) {
> -		if (!count)
> -			break;
> +		size_t copied;
> +
> +		if (remains == 0)
> +			goto finished;
>  
>  		vm = va->vm;
>  		flags = va->flags & VMAP_FLAGS_MASK;
> @@ -3608,6 +3658,7 @@ long vread(char *buf, char *addr, unsigned long count)
>  
>  		if (vm && (vm->flags & VM_UNINITIALIZED))
>  			continue;
> +
>  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
>  		smp_rmb();
>  
> @@ -3616,38 +3667,45 @@ long vread(char *buf, char *addr, unsigned long count)
>  
>  		if (addr >= vaddr + size)
>  			continue;
> -		while (addr < vaddr) {
> -			if (count == 0)
> +
> +		if (addr < vaddr) {
> +			size_t to_zero = min_t(size_t, vaddr - addr, remains);
> +			size_t zeroed = zero_iter(iter, to_zero);
> +
> +			addr += zeroed;
> +			remains -= zeroed;
> +
> +			if (remains == 0 || zeroed != to_zero)
>  				goto finished;
> -			*buf = '\0';
> -			buf++;
> -			addr++;
> -			count--;
>  		}
> +
>  		n = vaddr + size - addr;
> -		if (n > count)
> -			n = count;
> +		if (n > remains)
> +			n = remains;
>  
>  		if (flags & VMAP_RAM)
> -			vmap_ram_vread(buf, addr, n, flags);
> +			copied = vmap_ram_vread_iter(iter, addr, n, flags);
>  		else if (!(vm->flags & VM_IOREMAP))
> -			aligned_vread(buf, addr, n);
> +			copied = aligned_vread_iter(iter, addr, n);
>  		else /* IOREMAP area is treated as memory hole */
> -			memset(buf, 0, n);
> -		buf += n;
> -		addr += n;
> -		count -= n;
> +			copied = zero_iter(iter, n);
> +
> +		addr += copied;
> +		remains -= copied;
> +
> +		if (copied != n)
> +			goto finished;
>  	}
> -finished:
> -	spin_unlock(&vmap_area_lock);
>  
> -	if (buf == buf_start)
> -		return 0;
> +finished_zero:
> +	spin_unlock(&vmap_area_lock);
>  	/* zero-fill memory holes */
> -	if (buf != buf_start + buflen)
> -		memset(buf, 0, buflen - (buf - buf_start));
> +	return count - remains + zero_iter(iter, remains);
> +finished:
> +	/* Nothing remains, or We couldn't copy/zero everything. */
                              ~~~ typo, s/We/we/
> +	spin_unlock(&vmap_area_lock);
>  
> -	return buflen;
> +	return count - remains;
>  }
>  
>  /**
> -- 
> 2.39.2
> 



  parent reply	other threads:[~2023-03-22 12:55 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-21 20:54 [PATCH v4 0/4] convert read_kcore(), vread() to use iterators Lorenzo Stoakes
2023-03-21 20:54 ` [PATCH v4 1/4] fs/proc/kcore: avoid bounce buffer for ktext data Lorenzo Stoakes
2023-03-21 20:54 ` [PATCH v4 2/4] fs/proc/kcore: convert read_kcore() to read_kcore_iter() Lorenzo Stoakes
2023-03-22  1:15   ` Baoquan He
2023-03-22  6:17     ` Lorenzo Stoakes
2023-03-22 11:12   ` David Hildenbrand
2023-03-21 20:54 ` [PATCH v4 3/4] iov_iter: add copy_page_to_iter_atomic() Lorenzo Stoakes
2023-03-22 10:17   ` Baoquan He
2023-03-22 10:32     ` Lorenzo Stoakes
2023-03-22 11:06       ` Lorenzo Stoakes
2023-03-22 13:21         ` Baoquan He
2023-03-22 13:08       ` Baoquan He
2023-03-21 20:54 ` [PATCH v4 4/4] mm: vmalloc: convert vread() to vread_iter() Lorenzo Stoakes
2023-03-22 11:18   ` David Hildenbrand
2023-03-22 11:31     ` Lorenzo Stoakes
2023-03-22 12:55   ` Baoquan He [this message]
2023-03-22 13:43     ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZBr6u+rivMztIvn9@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=david@redhat.com \
    --cc=jolsa@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liushixin2@huawei.com \
    --cc=lstoakes@gmail.com \
    --cc=urezki@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox