Re: [PATCH] mm: add vm event for page cache miss

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Michal Hocko <mhocko@suse.com>
To: Yafang Shao <laoar.shao@gmail.com>
Cc: willy@infradead.org, jack@suse.cz, hughd@google.com,
	vbabka@suse.cz, akpm@linux-foundation.org, linux-mm@kvack.org
Subject: Re: [PATCH] mm: add vm event for page cache miss
Date: Tue, 2 Apr 2019 09:23:51 +0200	[thread overview]
Message-ID: <20190402072351.GN28293@dhcp22.suse.cz> (raw)
In-Reply-To: <1554185720-26404-1-git-send-email-laoar.shao@gmail.com>

On Tue 02-04-19 14:15:20, Yafang Shao wrote:
> We found that some latency spike was caused by page cache miss on our
> database server.
> So we decide to measure the page cache miss.
> Currently the kernel is lack of this facility for measuring it.

What are you going to use this information for?

> This patch introduces a new vm counter PGCACHEMISS for this purpose.
> This counter will be incremented in bellow scenario,
> - page cache miss in generic file read routine
> - read access page cache miss in mmap
> - read access page cache miss in swapin
>
> NB, readahead routine is not counted because it won't stall the
> application directly.

Doesn't this partially open the side channel we have closed for mincore
just recently?

> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/linux/pagemap.h       | 7 +++++++
>  include/linux/vm_event_item.h | 1 +
>  mm/filemap.c                  | 2 ++
>  mm/memory.c                   | 1 +
>  mm/shmem.c                    | 9 +++++----
>  mm/vmstat.c                   | 1 +
>  6 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index f939e00..8355b51 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -233,6 +233,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
>  pgoff_t page_cache_prev_miss(struct address_space *mapping,
>  			     pgoff_t index, unsigned long max_scan);
>  
> +static inline void page_cache_read_miss(struct vm_fault *vmf)
> +{
> +	if (!vmf || (vmf->flags & (FAULT_FLAG_USER | FAULT_FLAG_WRITE)) ==
> +	    FAULT_FLAG_USER)
> +		count_vm_event(PGCACHEMISS);
> +}
> +
>  #define FGP_ACCESSED		0x00000001
>  #define FGP_LOCK		0x00000002
>  #define FGP_CREAT		0x00000004
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 47a3441..d589f05 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -29,6 +29,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  		PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
>  		PGFAULT, PGMAJFAULT,
>  		PGLAZYFREED,
> +		PGCACHEMISS,
>  		PGREFILL,
>  		PGSTEAL_KSWAPD,
>  		PGSTEAL_DIRECT,
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 4157f85..fc12c2d 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2256,6 +2256,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  		goto out;
>  
>  no_cached_page:
> +		page_cache_read_miss(NULL);
>  		/*
>  		 * Ok, it wasn't cached, so we need to create a new
>  		 * page..
> @@ -2556,6 +2557,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
>  		fpin = do_async_mmap_readahead(vmf, page);
>  	} else if (!page) {
>  		/* No page in the page cache at all */
> +		page_cache_read_miss(vmf);
>  		count_vm_event(PGMAJFAULT);
>  		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
>  		ret = VM_FAULT_MAJOR;
> diff --git a/mm/memory.c b/mm/memory.c
> index bd157f2..63bcd41 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2754,6 +2754,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  		ret = VM_FAULT_MAJOR;
>  		count_vm_event(PGMAJFAULT);
>  		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
> +		page_cache_read_miss(vmf);
>  	} else if (PageHWPoison(page)) {
>  		/*
>  		 * hwpoisoned dirty swapcache pages are kept for killing
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 3a4b74c..47e33a4 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -127,7 +127,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
>  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			     struct page **pagep, enum sgp_type sgp,
>  			     gfp_t gfp, struct vm_area_struct *vma,
> -			     vm_fault_t *fault_type);
> +			     struct vm_fault *vmf, vm_fault_t *fault_type);
>  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  		struct page **pagep, enum sgp_type sgp,
>  		gfp_t gfp, struct vm_area_struct *vma,
> @@ -1159,7 +1159,7 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
>  		error = shmem_swapin_page(inode, indices[i],
>  					  &page, SGP_CACHE,
>  					  mapping_gfp_mask(mapping),
> -					  NULL, NULL);
> +					  NULL, NULL, NULL);
>  		if (error == 0) {
>  			unlock_page(page);
>  			put_page(page);
> @@ -1614,7 +1614,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
>  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			     struct page **pagep, enum sgp_type sgp,
>  			     gfp_t gfp, struct vm_area_struct *vma,
> -			     vm_fault_t *fault_type)
> +			     struct vm_fault *vmf, vm_fault_t *fault_type)
>  {
>  	struct address_space *mapping = inode->i_mapping;
>  	struct shmem_inode_info *info = SHMEM_I(inode);
> @@ -1636,6 +1636,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			*fault_type |= VM_FAULT_MAJOR;
>  			count_vm_event(PGMAJFAULT);
>  			count_memcg_event_mm(charge_mm, PGMAJFAULT);
> +			page_cache_read_miss(vmf);
>  		}
>  		/* Here we actually start the io */
>  		page = shmem_swapin(swap, gfp, info, index);
> @@ -1758,7 +1759,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  	page = find_lock_entry(mapping, index);
>  	if (xa_is_value(page)) {
>  		error = shmem_swapin_page(inode, index, &page,
> -					  sgp, gfp, vma, fault_type);
> +					  sgp, gfp, vma, vmf, fault_type);
>  		if (error == -EEXIST)
>  			goto repeat;
>  
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 36b56f8..c49ecba 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1188,6 +1188,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
>  	"pgfault",
>  	"pgmajfault",
>  	"pglazyfreed",
> +	"pgcachemiss",
>  
>  	"pgrefill",
>  	"pgsteal_kswapd",
> -- 
> 1.8.3.1
> 

-- 
Michal Hocko
SUSE Labs

next prev parent reply	other threads:[~2019-04-02  7:23 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-02  6:15 Yafang Shao
2019-04-02  7:23 ` Michal Hocko [this message]
2019-04-02  7:38   ` Yafang Shao
2019-04-02  7:44     ` Michal Hocko
2019-04-02  7:49       ` Michal Hocko
2019-04-02  7:56         ` Yafang Shao
2019-04-02  7:54       ` Yafang Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190402072351.GN28293@dhcp22.suse.cz \
    --to=mhocko@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=laoar.shao@gmail.com \
    --cc=linux-mm@kvack.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox