Re: [PATCH] mm: shmem: implement POSIX_FADV_[WILL|DONT]NEED for shmem

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Charan Teja Kalla <charante@codeaurora.org>
To: hughd@google.com, akpm@linux-foundation.org, vbabka@suse.cz,
	rientjes@google.com, david@redhat.com, mhocko@suse.com,
	linux-mm@kvack.org, ",surenb"@google.com
Cc: linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm: shmem: implement POSIX_FADV_[WILL|DONT]NEED for shmem
Date: Mon, 25 Oct 2021 19:47:06 +0530	[thread overview]
Message-ID: <dde4c010-be57-05d0-a59d-1ed1fc768bdb@codeaurora.org> (raw)
In-Reply-To: <1633701982-22302-1-git-send-email-charante@codeaurora.org>

Hello,

Can you please provide your valuable feed back here. There are of great
help to me.

Thanks,
Charan

On 10/8/2021 7:36 PM, Charan Teja Reddy wrote:
> Currently fadvise(2) is supported only for the files that doesn't
> associated with noop_backing_dev_info thus for the files, like shmem,
> fadvise results into NOP. But then there is file_operations->fadvise()
> that lets the file systems to implement their own fadvise
> implementation. Use this support to implement some of the POSIX_FADV_XXX
> functionality for shmem files.
> 
> This patch aims to implement POSIX_FADV_WILLNEED and POSIX_FADV_DONTNEED
> advices to shmem files which can be helpful for the drivers who may want
> to manage the shmem pages of the files that are created through
> shmem_file_setup[_with_mnt]().  An example usecase may be like, driver
> can create the shmem file of the size equal to its requirements and
> map the pages for DMA and then pass the fd to user. The user who knows
> well about the usage of these pages can now decide when these pages are
> not required push them to swap through DONTNEED thus free up memory well
> in advance rather than relying on the reclaim and use WILLNEED when it
> decide that they are useful in the near future. IOW, it lets the clients
> to free up/read the memory when it wants to. Another usecase is that GEM
> objets which are currenlty allocated and managed through shmem files can
> use vfs_fadvise(DONT|WILLNEED) on shmem fd when the driver comes to
> know(like through some hints from user space) that GEM objects are not
> going to use/will need in the near future.
> 
> Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
> ---
>  mm/shmem.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 139 insertions(+)
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 70d9ce2..ab7ea33 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -38,6 +38,8 @@
>  #include <linux/hugetlb.h>
>  #include <linux/frontswap.h>
>  #include <linux/fs_parser.h>
> +#include <linux/mm_inline.h>
> +#include <linux/fadvise.h>
>  
>  #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
>  
> @@ -2792,6 +2794,142 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
>  	return error;
>  }
>  
> +static int shmem_fadvise_dontneed(struct address_space *mapping, loff_t start,
> +				loff_t end)
> +{
> +	int ret;
> +	struct writeback_control wbc = {
> +		.sync_mode = WB_SYNC_NONE,
> +		.nr_to_write = LONG_MAX,
> +		.range_start = 0,
> +		.range_end = LLONG_MAX,
> +		.for_reclaim = 1,
> +	};
> +	struct page *page;
> +
> +	XA_STATE(xas, &mapping->i_pages, start);
> +	if (!shmem_mapping(mapping))
> +		return -EINVAL;
> +
> +	if (!total_swap_pages)
> +		return 0;
> +
> +	lru_add_drain();
> +
> +	rcu_read_lock();
> +	xas_for_each(&xas, page, end) {
> +		if (xas_retry(&xas, page))
> +			continue;
> +		if (xa_is_value(page))
> +			continue;
> +		if (isolate_lru_page(page))
> +			continue;
> +
> +		inc_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +		lock_page(page);
> +		ClearPageDirty(page);
> +		SetPageReclaim(page);
> +		ret = shmem_writepage(page, &wbc);
> +		if (!PageWriteback(page))
> +			ClearPageReclaim(page);
> +		if (ret) {
> +			unlock_page(page);
> +			putback_lru_page(page);
> +			dec_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +			continue;
> +		}
> +
> +		/*
> +		 * shmem_writepage() place the page in the swapcache.
> +		 * Delete the page from the swapcache and release the
> +		 * page.
> +		 */
> +		lock_page(page);
> +		delete_from_swap_cache(page);
> +		unlock_page(page);
> +		dec_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +		put_page(page);
> +		if (need_resched()) {
> +			xas_pause(&xas);
> +			cond_resched_rcu();
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +static int shmem_fadvise_willneed(struct address_space *mapping,
> +				 pgoff_t start, pgoff_t long end)
> +{
> +	struct page *page;
> +
> +	XA_STATE(xas, &mapping->i_pages, start);
> +	rcu_read_lock();
> +	xas_for_each(&xas, page, end) {
> +		if (!xa_is_value(page))
> +			continue;
> +		page = shmem_read_mapping_page(mapping, xas.xa_index);
> +		if (!IS_ERR(page))
> +			put_page(page);
> +
> +		if (need_resched()) {
> +			xas_pause(&xas);
> +			cond_resched_rcu();
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +static int shmem_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
> +{
> +	loff_t endbyte;
> +	pgoff_t start_index;
> +	pgoff_t end_index;
> +	struct address_space *mapping;
> +	int ret = 0;
> +
> +	mapping = file->f_mapping;
> +	if (!mapping || len < 0)
> +		return -EINVAL;
> +
> +	endbyte = (u64)offset + (u64)len;
> +	if (!len || endbyte < len)
> +		endbyte = -1;
> +	else
> +		endbyte--;
> +
> +
> +	start_index = offset >> PAGE_SHIFT;
> +	end_index   = endbyte >> PAGE_SHIFT;
> +	switch (advice) {
> +	case POSIX_FADV_DONTNEED:
> +		ret = shmem_fadvise_dontneed(mapping, start_index, end_index);
> +		break;
> +	case POSIX_FADV_WILLNEED:
> +		ret = shmem_fadvise_willneed(mapping, start_index, end_index);
> +		break;
> +	case POSIX_FADV_NORMAL:
> +	case POSIX_FADV_RANDOM:
> +	case POSIX_FADV_SEQUENTIAL:
> +	case POSIX_FADV_NOREUSE:
> +		/*
> +		 * No bad return value, but ignore advice. May have to
> +		 * implement in future.
> +		 */
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
>  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
> @@ -3799,6 +3937,7 @@ static const struct file_operations shmem_file_operations = {
>  	.splice_write	= iter_file_splice_write,
>  	.fallocate	= shmem_fallocate,
>  #endif
> +	.fadvise	= shmem_fadvise,
>  };
>  
>  static const struct inode_operations shmem_inode_operations = {
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project

next prev parent reply	other threads:[~2021-10-25 14:17 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08 14:06 Charan Teja Reddy
2021-10-25 14:17 ` Charan Teja Kalla [this message]
2021-10-28 18:40 ` Suren Baghdasaryan
2021-11-01 14:40   ` Charan Teja Kalla
2021-11-02 18:12     ` Suren Baghdasaryan
2021-11-03 12:41       ` Charan Teja Kalla

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dde4c010-be57-05d0-a59d-1ed1fc768bdb@codeaurora.org \
    --to=charante@codeaurora.org \
    --cc=",surenb"@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=rientjes@google.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox