From: Zi Yan <ziy@nvidia.com>
To: Matthew Wilcox <willy@infradead.org>
Cc: <linux-fsdevel@vger.kernel.org>, <linux-mm@kvack.org>,
<hch@lst.de>, <kent.overstreet@gmail.com>
Subject: Re: [PATCH v3 04/18] mm/filemap: Use THPs in generic_file_buffered_read
Date: Tue, 17 Nov 2020 11:00:36 -0500 [thread overview]
Message-ID: <52328F4C-897D-4A56-9677-2B857661A487@nvidia.com> (raw)
In-Reply-To: <20201110033703.23261-5-willy@infradead.org>
[-- Attachment #1: Type: text/plain, Size: 8337 bytes --]
On 9 Nov 2020, at 22:36, Matthew Wilcox (Oracle) wrote:
> Add filemap_get_read_batch() which returns the THPs which represent a
> contiguous array of bytes in the file. It also stops when encountering
> a page marked as Readahead or !Uptodate (but does return that page)
> so it can be handled appropriately by filemap_get_pages(). That lets us
> remove the loop in filemap_get_pages() and check only the last page.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
> mm/filemap.c | 122 +++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 85 insertions(+), 37 deletions(-)
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bd02820601f8..1de586eb377e 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2176,6 +2176,51 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
> return lock_page_killable(page);
> }
>
> +/*
> + * filemap_get_read_batch - Get a batch of pages for read
> + *
> + * Get a batch of pages which represent a contiguous range of bytes
> + * in the file. No tail pages will be returned. If @index is in the
> + * middle of a THP, the entire THP will be returned. The last page in
> + * the batch may have Readahead set or be not Uptodate so that the
> + * caller can take the appropriate action.
> + */
> +static void filemap_get_read_batch(struct address_space *mapping,
> + pgoff_t index, pgoff_t max, struct pagevec *pvec)
> +{
> + XA_STATE(xas, &mapping->i_pages, index);
> + struct page *head;
> +
> + rcu_read_lock();
> + for (head = xas_load(&xas); head; head = xas_next(&xas)) {
> + if (xas_retry(&xas, head))
> + continue;
> + if (xas.xa_index > max || xa_is_value(head))
> + break;
> + if (!page_cache_get_speculative(head))
> + goto retry;
> +
> + /* Has the page moved or been split? */
> + if (unlikely(head != xas_reload(&xas)))
> + goto put_page;
> +
> + if (!pagevec_add(pvec, head))
> + break;
> + if (!PageUptodate(head))
> + break;
> + if (PageReadahead(head))
> + break;
> + xas.xa_index = head->index + thp_nr_pages(head) - 1;
> + xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
> + continue;
> +put_page:
> + put_page(head);
> +retry:
> + xas_reset(&xas);
> + }
> + rcu_read_unlock();
> +}
> +
> static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
> struct address_space *mapping, struct page *page)
> {
> @@ -2329,15 +2374,15 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
> struct address_space *mapping = filp->f_mapping;
> struct file_ra_state *ra = &filp->f_ra;
> pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
> - pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
> - unsigned int nr = min_t(unsigned long, last_index - index, PAGEVEC_SIZE);
> - int i, j, err = 0;
> + pgoff_t last_index;
> + int err = 0;
>
> + last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
> find_page:
> if (fatal_signal_pending(current))
> return -EINTR;
>
> - pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
> + filemap_get_read_batch(mapping, index, last_index, pvec);
> if (pvec->nr)
> goto got_pages;
>
> @@ -2346,29 +2391,30 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
>
> page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
>
> - pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
> + filemap_get_read_batch(mapping, index, last_index, pvec);
> if (pvec->nr)
> goto got_pages;
>
> pvec->pages[0] = filemap_create_page(iocb, iter);
> err = PTR_ERR_OR_ZERO(pvec->pages[0]);
> - if (!IS_ERR_OR_NULL(pvec->pages[0]))
> - pvec->nr = 1;
> + if (IS_ERR_OR_NULL(pvec->pages[0]))
> + goto err;
> + pvec->nr = 1;
> + return 0;
> got_pages:
> - for (i = 0; i < pvec->nr; i++) {
> - struct page *page = pvec->pages[i];
> - pgoff_t pg_index = index + i;
> + {
> + struct page *page = pvec->pages[pvec->nr - 1];
> + pgoff_t pg_index = page->index;
> loff_t pg_pos = max(iocb->ki_pos,
> (loff_t) pg_index << PAGE_SHIFT);
> loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
>
> if (PageReadahead(page)) {
> if (iocb->ki_flags & IOCB_NOIO) {
> - for (j = i; j < pvec->nr; j++)
> - put_page(pvec->pages[j]);
> - pvec->nr = i;
> + put_page(page);
> + pvec->nr--;
> err = -EAGAIN;
> - break;
> + goto err;
> }
> page_cache_async_readahead(mapping, ra, filp, page,
> pg_index, last_index - pg_index);
> @@ -2376,26 +2422,23 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
>
> if (!PageUptodate(page)) {
> if ((iocb->ki_flags & IOCB_NOWAIT) ||
> - ((iocb->ki_flags & IOCB_WAITQ) && i)) {
> - for (j = i; j < pvec->nr; j++)
> - put_page(pvec->pages[j]);
> - pvec->nr = i;
> + ((iocb->ki_flags & IOCB_WAITQ) && pvec->nr > 1)) {
> + put_page(page);
> + pvec->nr--;
> err = -EAGAIN;
> - break;
> + goto err;
> }
>
> page = filemap_update_page(iocb, filp, iter, page,
> pg_pos, pg_count);
> if (IS_ERR_OR_NULL(page)) {
> - for (j = i + 1; j < pvec->nr; j++)
> - put_page(pvec->pages[j]);
> - pvec->nr = i;
> + pvec->nr--;
> err = PTR_ERR_OR_ZERO(page);
> - break;
> }
> }
> }
>
> +err:
> if (likely(pvec->nr))
> return 0;
> if (err)
> @@ -2437,6 +2480,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
> if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
> return 0;
> iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
> + pagevec_init(pvec);
This should be pagevec_init(&pvec);
>
> do {
> cond_resched();
> @@ -2464,13 +2508,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
> isize = i_size_read(inode);
> if (unlikely(iocb->ki_pos >= isize))
> goto put_pages;
> -
> end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
>
> - while ((iocb->ki_pos >> PAGE_SHIFT) + pvec.nr >
> - (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
> - put_page(pvec.pages[--pvec.nr]);
> -
> /*
> * Once we start copying data, we don't want to be touching any
> * cachelines that might be contended:
> @@ -2484,24 +2523,32 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
> if (iocb->ki_pos >> PAGE_SHIFT !=
> ra->prev_pos >> PAGE_SHIFT)
> mark_page_accessed(pvec.pages[0]);
> - for (i = 1; i < pagevec_count(&pvec); i++)
> - mark_page_accessed(pvec.pages[i]);
>
> for (i = 0; i < pagevec_count(&pvec); i++) {
> - unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
> - unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
> - PAGE_SIZE - offset);
> - unsigned int copied;
> + struct page *page = pvec.pages[i];
> + size_t page_size = thp_size(page);
> + size_t offset = iocb->ki_pos & (page_size - 1);
> + size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
> + page_size - offset);
> + size_t copied;
>
> + if (end_offset < page_offset(page))
> + break;
> + if (i > 0)
> + mark_page_accessed(page);
> /*
> * If users can be writing to this page using arbitrary
> * virtual addresses, take care about potential aliasing
> * before reading the page on the kernel side.
> */
> - if (writably_mapped)
> - flush_dcache_page(pvec.pages[i]);
> + if (writably_mapped) {
> + int j;
> +
> + for (j = 0; j < thp_nr_pages(page); j++)
> + flush_dcache_page(page + j);
> + }
>
> - copied = copy_page_to_iter(pvec.pages[i], offset, bytes, iter);
> + copied = copy_page_to_iter(page, offset, bytes, iter);
>
> written += copied;
> iocb->ki_pos += copied;
> @@ -2515,6 +2562,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
> put_pages:
> for (i = 0; i < pagevec_count(&pvec); i++)
> put_page(pvec.pages[i]);
> + pagevec_reinit(pvec);
It should be pagevec_reinit(&pvec);
Found the above two issues during compilation.
—
Best Regards,
Yan Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
next prev parent reply other threads:[~2020-11-17 16:01 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-10 3:36 [PATCH v3 00/18] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 01/18] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 02/18] mm/filemap: Remove dynamically allocated array from filemap_read Matthew Wilcox (Oracle)
2020-11-10 18:20 ` Christoph Hellwig
2020-11-10 3:36 ` [PATCH v3 03/18] mm/filemap: Convert filemap_get_pages to take a pagevec Matthew Wilcox (Oracle)
2020-11-10 18:21 ` Christoph Hellwig
2020-11-10 18:25 ` Matthew Wilcox
2020-11-10 3:36 ` [PATCH v3 04/18] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
2020-11-17 16:00 ` Zi Yan [this message]
2020-11-17 16:07 ` Matthew Wilcox
2020-11-10 3:36 ` [PATCH v3 05/18] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 06/18] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 07/18] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 08/18] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 09/18] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 10/18] mm/filemap: Change filemap_create_page " Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 11/18] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
2020-11-10 18:22 ` Christoph Hellwig
2020-11-10 3:36 ` [PATCH v3 12/18] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
2020-11-10 3:36 ` [PATCH v3 13/18] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
2020-11-10 18:22 ` Christoph Hellwig
2020-11-10 3:36 ` [PATCH v3 14/18] mm/filemap: Split filemap_readahead out of filemap_get_pages Matthew Wilcox (Oracle)
2020-11-10 18:23 ` Christoph Hellwig
2020-11-10 3:37 ` [PATCH v3 15/18] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
2020-11-10 18:24 ` Christoph Hellwig
2020-11-10 3:37 ` [PATCH v3 16/18] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
2020-11-10 18:24 ` Christoph Hellwig
2020-11-10 3:37 ` [PATCH v3 17/18] mm/filemap: Rename generic_file_buffered_read to filemap_read Matthew Wilcox (Oracle)
2020-11-10 3:37 ` [PATCH v3 18/18] mm/filemap: Simplify generic_file_read_iter Matthew Wilcox (Oracle)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=52328F4C-897D-4A56-9677-2B857661A487@nvidia.com \
--to=ziy@nvidia.com \
--cc=hch@lst.de \
--cc=kent.overstreet@gmail.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox