linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Tosatti <marcelo@kvack.org>
To: "Daniel Spång" <daniel.spang@gmail.com>
Cc: linux-mm@kvack.org, marcelo@kvack.org, drepper@redhat.com,
	riel@redhat.com, akpm@linux-foundation.org, mbligh@mbligh.org,
	balbir@linux.vnet.ibm.com, 7eggert@gmx.de
Subject: Re: [RFC Patch] Thrashing notification
Date: Mon, 5 Nov 2007 13:30:25 -0500	[thread overview]
Message-ID: <20071105183025.GA4984@dmt> (raw)
In-Reply-To: <op.t1bp13jkk4ild9@bingo>

On Mon, Nov 05, 2007 at 03:47:53PM +0100, Daniel Spang wrote:
> This patch provides a way to notify user applications when the system
> is about to thrash. It checks the scanning priority of the inactive
> lru list and notifies user applications via sysfs when the priority
> reaches a threshold. In comparison to Marcelo Tosatti's oom
> notification patch, this patch also works on systems without swap.
> 
> Applications can poll() on this sysfs file and can then free memory in
> one way or another to prevent an oom situation.
> 
> Using a test application http://spng.se/oomtest/ that uses multiple
> allocator threads and a single release thread one can see that this
> works fairly well. See http://spng.se/oomtest/ for more details
> and graphs.

Daniel,

Hooking into try_to_free_pages() makes the scheme suspectible to
specifics such as:

- can the task writeout pages?
- is the allocation a higher order one?
- in what zones is it operating on?

Remember that notifications are sent to applications which can allocate
globally... It is not very useful to send notifications for a userspace
which has a large percentage of its memory in highmem if the system is
having a lowmem zone shortage (granted that the notify-on-swap heuristic
has that problem, but you can then argue that swap affects system
performance globally, and it generally does in desktop systems).

Other than that tuning "priority" from try_to_free_pages() is rather
difficult for users/admins.

My previous patches had the zone limitation, but the following way of
asking "are we low on memory?" gets rid of it:

+static unsigned int mem_notify_poll(struct file *file, poll_table *wait)
+{
+       unsigned int val = 0;
+       struct zone *zone;
+       int tpages_low, tpages_free, tpages_reserve;
+
+       tpages_low = tpages_free = tpages_reserve = 0;
+
+       poll_wait(file, &mem_wait, wait);
+
+       for_each_zone(zone) {
+               if (!populated_zone(zone))
+                       continue;
+               tpages_low += zone->pages_low;
+               tpages_free += zone_page_state(zone, NR_FREE_PAGES);
+               /* always use the reserve of the highest allocation type */
+               tpages_reserve += zone->lowmem_reserve[MAX_NR_ZONES-1];
+       }
+
+       if (mem_notify_status || (tpages_free <= tpages_low + tpages_reserve))
+               val = POLLIN;
+
+       return val;
+}

> Signed-off-by: Daniel Spang <daniel.spang@gmail.com>
> 
> diff -purN linux-2.6.23.1-mm1/include/linux/thrashing_notify.h linux-2.6.23.1-mm1_thrashing/include/linux/thrashing_notify.h
> --- linux-2.6.23.1-mm1/include/linux/thrashing_notify.h	1970-01-01 01:00:00.000000000 +0100
> +++ linux-2.6.23.1-mm1_thrashing/include/linux/thrashing_notify.h	2007-11-05 14:23:26.000000000 +0100
> @@ -0,0 +1,8 @@
> +#ifndef _LINUX_THRASHING_NOTIFY_H
> +#define _LINUX_THRASHING_NOTIFY_H
> +
> +void thrashing_notify(int priority);
> +
> +extern int thrashing_notifier_threshold;
> +
> +#endif /* _LINUX_THRASHING_NOTIFY_H */
> diff -purN linux-2.6.23.1-mm1/kernel/sysctl.c linux-2.6.23.1-mm1_thrashing/kernel/sysctl.c
> --- linux-2.6.23.1-mm1/kernel/sysctl.c	2007-11-01 14:59:16.000000000 +0100
> +++ linux-2.6.23.1-mm1_thrashing/kernel/sysctl.c	2007-11-05 14:22:29.000000000 +0100
> @@ -46,6 +46,7 @@
>  #include <linux/nfs_fs.h>
>  #include <linux/acpi.h>
>  #include <linux/reboot.h>
> +#include <linux/thrashing_notify.h>
>  
>  #include <asm/uaccess.h>
>  #include <asm/processor.h>
> @@ -102,6 +103,7 @@ static int minolduid;
>  static int min_percpu_pagelist_fract = 8;
>  
>  static int ngroups_max = NGROUPS_MAX;
> +static int def_priority = DEF_PRIORITY;
>  
>  #ifdef CONFIG_KMOD
>  extern char modprobe_path[];
> @@ -1071,6 +1073,16 @@ static struct ctl_table vm_table[] = {
>  		.extra1		= &zero,
>  	},
>  #endif
> +	{
> +		.ctl_name	= CTL_UNNUMBERED,
> +		.procname	= "thrashing_notifier_threshold",
> +		.data		= &thrashing_notifier_threshold,
> +		.maxlen		= sizeof thrashing_notifier_threshold,
> +		.mode		= 0644,
> +		.proc_handler	= &proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &def_priority,
> +	},
>  /*
>   * NOTE: do not add new entries to this table unless you have read
>   * Documentation/sysctl/ctl_unnumbered.txt
> diff -purN linux-2.6.23.1-mm1/mm/Makefile linux-2.6.23.1-mm1_thrashing/mm/Makefile
> --- linux-2.6.23.1-mm1/mm/Makefile	2007-11-01 14:59:16.000000000 +0100
> +++ linux-2.6.23.1-mm1_thrashing/mm/Makefile	2007-11-05 14:22:11.000000000 +0100
> @@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
>  			   page_alloc.o page-writeback.o pdflush.o \
>  			   readahead.o swap.o truncate.o vmscan.o \
>  			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
> -			   page_isolation.o $(mmu-y)
> +			   page_isolation.o thrashing_notify.o $(mmu-y)
>  
>  obj-$(CONFIG_BOUNCE)	+= bounce.o
>  obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
> diff -purN linux-2.6.23.1-mm1/mm/thrashing_notify.c linux-2.6.23.1-mm1_thrashing/mm/thrashing_notify.c
> --- linux-2.6.23.1-mm1/mm/thrashing_notify.c	1970-01-01 01:00:00.000000000 +0100
> +++ linux-2.6.23.1-mm1_thrashing/mm/thrashing_notify.c	2007-11-05 14:22:46.000000000 +0100
> @@ -0,0 +1,56 @@
> +/*
> + * mm/thrashing_notify.c
> + *
> + * Copyright (C) 2007 Daniel Spang <daniel.spang@gmail.com>
> + *
> + * Released under the GPL, see the file COPYING for details.
> + */
> +
> +#include <linux/thrashing_notify.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/mman.h>
> +#include <linux/init.h>
> +#include <linux/types.h>
> +#include <linux/kobject.h>
> +#include <linux/sysfs.h>
> +
> +/*
> + * The count of thrashing occasions.
> + *
> + * Published to userspace at /sys/kernel/nr_thrashing
> + */
> +int nr_thrashing = 0;
> +
> +int thrashing_notifier_threshold = 4;
> +
> +static ssize_t nr_thrashing_show(struct kset *kset, char *page)
> +{
> +	return sprintf(page, "%u\n", nr_thrashing);
> +}
> +
> +static struct subsys_attribute nr_thrashing_attr = __ATTR_RO(nr_thrashing);
> +
> +static struct attribute *nr_thrashing_attrs[] = {
> +	&nr_thrashing_attr.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group nr_thrashing_attr_group = {
> +	.attrs  = nr_thrashing_attrs,
> +};
> +
> +void thrashing_notify(int priority)
> +{
> +	nr_thrashing++;
> +	sysfs_notify(&kernel_subsys.kobj, NULL, "nr_thrashing");
> +}
> +
> +static int __init thrashing_init(void)
> +{
> +	return sysfs_create_group(&kernel_subsys.kobj,
> +			       &nr_thrashing_attr_group);
> +}
> +
> +module_init(thrashing_init)
> +
> diff -purN linux-2.6.23.1-mm1/mm/vmscan.c linux-2.6.23.1-mm1_thrashing/mm/vmscan.c
> --- linux-2.6.23.1-mm1/mm/vmscan.c	2007-11-01 14:59:16.000000000 +0100
> +++ linux-2.6.23.1-mm1_thrashing/mm/vmscan.c	2007-11-05 14:21:55.000000000 +0100
> @@ -39,6 +39,7 @@
>  #include <linux/kthread.h>
>  #include <linux/freezer.h>
>  #include <linux/memcontrol.h>
> +#include <linux/thrashing_notify.h>
>  
>  #include <asm/tlbflush.h>
>  #include <asm/div64.h>
> @@ -1285,6 +1286,9 @@ static unsigned long do_try_to_free_page
>  		sc->nr_io_pages = 0;
>  		if (!priority)
>  			disable_swap_token();
> +		if (priority == thrashing_notifier_threshold)
> +			thrashing_notify(priority);
>  		nr_reclaimed += shrink_zones(priority, zones, sc);
>  		/*
>  		 * Don't shrink slabs when reclaiming memory from
> @@ -1448,7 +1452,9 @@ loop_again:
>  		/* The swap token gets in the way of swapout... */
>  		if (!priority)
>  			disable_swap_token();
> +		if (priority == thrashing_notifier_threshold)
> +			thrashing_notify(priority);
>  		sc.nr_io_pages = 0;
>  		all_zones_ok = 1;
>  

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-11-05 18:30 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-05 14:47 Daniel Spång
2007-11-05 15:43 ` Daniel Spång
2007-11-05 18:30 ` Marcelo Tosatti [this message]
2007-11-05 20:17   ` Rik van Riel
2007-11-06 10:41     ` Daniel Spång
2007-11-06 20:01       ` Rik van Riel
2007-11-06 21:27         ` Daniel Spång
2007-11-06 10:36   ` Daniel Spång

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071105183025.GA4984@dmt \
    --to=marcelo@kvack.org \
    --cc=7eggert@gmx.de \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=daniel.spang@gmail.com \
    --cc=drepper@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=mbligh@mbligh.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox