From: Marcelo Tosatti <marcelo@kvack.org>
To: linux-mm@kvack.org
Cc: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>,
"Daniel Spång" <daniel.spang@gmail.com>,
"Rik van Riel" <riel@redhat.com>,
"Andrew Morton" <akpm@linux-foundation.org>
Subject: [PATCH] mem notifications v3
Date: Mon, 24 Dec 2007 15:32:50 -0500 [thread overview]
Message-ID: <20071224203250.GA23149@dmt> (raw)
Follows updated version of mem-notify.
This changes the notification point to happen whenever the VM moves an
anonymous page to the inactive list - this is a pretty good indication
that there are unused anonymous pages present which will be very likely
swapped out soon.
Since the notification happens at shrink_zone() which can be called very
often the wakeups are rate limited to 5 times per second (on each CPU).
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
@@ -96,6 +96,7 @@ Your cooperation is appreciated.
11 = /dev/kmsg Writes to this come out as printk's
12 = /dev/oldmem Used by crashdump kernels to access
the memory of the kernel that crashed.
+ 13 = /dev/mem_notify Low memory notification.
1 block RAM disk
0 = /dev/ram0 First RAM disk
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
@@ -34,6 +34,8 @@
# include <linux/efi.h>
#endif
+extern struct file_operations mem_notify_fops;
+
/*
* Architectures vary in how they handle caching for addresses
* outside of main memory.
@@ -854,6 +856,9 @@ static int memory_open(struct inode * in
filp->f_op = &oldmem_fops;
break;
#endif
+ case 13:
+ filp->f_op = &mem_notify_fops;
+ break;
default:
return -ENXIO;
}
@@ -886,6 +891,7 @@ static const struct {
#ifdef CONFIG_CRASH_DUMP
{12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
#endif
+ {13,"mem_notify", S_IRUGO, &mem_notify_fops},
};
static struct class *mem_class;
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
@@ -213,6 +213,9 @@ extern int shmem_unuse(swp_entry_t entry
extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
+/* linux/mm/mem_notify.c */
+void mem_notify_userspace(void);
+
#ifdef CONFIG_SWAP
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- page_isolation.o $(mmu-y)
+ page_isolation.o mem_notify.o $(mmu-y)
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/mem_notify.c
===================================================================
--- /dev/null
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/mem_notify.c
@@ -0,0 +1,80 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+
+static unsigned long mem_notify_status = 0;
+
+static DECLARE_WAIT_QUEUE_HEAD(mem_wait);
+static DEFINE_PER_CPU(unsigned long, last_mem_notify) = INITIAL_JIFFIES;
+
+/* maximum 5 notifications per second per cpu */
+void mem_notify_userspace(void)
+{
+ unsigned long target;
+ unsigned long now = jiffies;
+
+ target = __get_cpu_var(last_mem_notify) + (HZ/5);
+
+ if (time_after(now, target)) {
+ __get_cpu_var(last_mem_notify) = now;
+ mem_notify_status = 1;
+ wake_up(&mem_wait);
+ }
+}
+
+static int mem_notify_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static int mem_notify_release(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static unsigned int mem_notify_poll(struct file *file, poll_table *wait)
+{
+ unsigned int val = 0;
+
+ poll_wait(file, &mem_wait, wait);
+
+ if (mem_notify_status) {
+ struct zone *zone;
+ int pages_high, pages_free, pages_reserve;
+
+ mem_notify_status = 0;
+
+ /* check if its not a spurious/stale notification */
+ pages_high = pages_free = pages_reserve = 0;
+ for_each_zone(zone) {
+ if (!populated_zone(zone) || is_highmem(zone))
+ continue;
+ pages_high += zone->pages_high;
+ pages_free += zone_page_state(zone, NR_FREE_PAGES);
+ pages_reserve += zone->lowmem_reserve[MAX_NR_ZONES-1];
+ }
+
+ if (pages_free < (pages_high+pages_reserve)*2)
+ val = POLLIN;
+ }
+
+ return val;
+}
+
+struct file_operations mem_notify_fops = {
+ .open = mem_notify_open,
+ .release = mem_notify_release,
+ .poll = mem_notify_poll,
+};
+EXPORT_SYMBOL(mem_notify_fops);
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
@@ -960,7 +960,7 @@ static inline int zone_is_near_oom(struc
* The downside is that we have to touch page->_count against each page.
* But we had to alter page->flags anyway.
*/
-static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
+static bool shrink_active_list(unsigned long nr_pages, struct zone *zone,
struct scan_control *sc, int priority)
{
unsigned long pgmoved;
@@ -972,6 +972,7 @@ static void shrink_active_list(unsigned
struct page *page;
struct pagevec pvec;
int reclaim_mapped = 0;
+ bool inactivated_anon = 0;
if (sc->may_swap) {
long mapped_ratio;
@@ -1078,6 +1079,13 @@ force_reclaim_mapped:
if (!reclaim_mapped ||
(total_swap_pages == 0 && PageAnon(page)) ||
page_referenced(page, 0, sc->mem_cgroup)) {
+ /* deal with the case where there is no
+ * swap but an anonymous page would be
+ * moved to the inactive list.
+ */
+ if (!total_swap_pages && reclaim_mapped &&
+ PageAnon(page))
+ inactivated_anon = 1;
list_add(&page->lru, &l_active);
continue;
}
@@ -1085,6 +1093,8 @@ force_reclaim_mapped:
list_add(&page->lru, &l_active);
continue;
}
+ if (PageAnon(page))
+ inactivated_anon = 1;
list_add(&page->lru, &l_inactive);
}
@@ -1146,6 +1156,7 @@ force_reclaim_mapped:
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
+ return inactivated_anon;
}
/*
@@ -1158,6 +1169,7 @@ static unsigned long shrink_zone(int pri
unsigned long nr_inactive;
unsigned long nr_to_scan;
unsigned long nr_reclaimed = 0;
+ bool inactivated_anon = 0;
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
@@ -1184,7 +1196,8 @@ static unsigned long shrink_zone(int pri
nr_to_scan = min(nr_active,
(unsigned long)sc->swap_cluster_max);
nr_active -= nr_to_scan;
- shrink_active_list(nr_to_scan, zone, sc, priority);
+ if (shrink_active_list(nr_to_scan, zone, sc, priority))
+ inactivated_anon = 1;
}
if (nr_inactive) {
@@ -1196,6 +1209,9 @@ static unsigned long shrink_zone(int pri
}
}
+ if (inactivated_anon)
+ mem_notify_userspace();
+
throttle_vm_writeout(sc->gfp_mask);
return nr_reclaimed;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2007-12-24 20:32 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-24 20:32 Marcelo Tosatti [this message]
2007-12-25 3:47 ` KOSAKI Motohiro
2007-12-25 4:56 ` [RFC] add poll_wait_exclusive() API KOSAKI Motohiro
2007-12-27 21:05 ` Marcelo Tosatti
2007-12-25 8:31 ` [PATCH] mem notifications v3 KOSAKI Motohiro
2007-12-25 10:31 ` [RFC][patch 1/2] mem notifications v3 improvement for large system KOSAKI Motohiro
2007-12-27 21:04 ` Marcelo Tosatti
2007-12-28 0:38 ` KOSAKI Motohiro
2007-12-25 10:31 ` [RFC][patch 2/2] " KOSAKI Motohiro
2007-12-25 10:41 ` KOSAKI Motohiro
2007-12-27 4:49 ` [RFC][patch] mem_notify more faster reduce load average KOSAKI Motohiro
2007-12-27 20:13 ` [PATCH] mem notifications v3 Marcelo Tosatti
2007-12-28 1:44 ` KOSAKI Motohiro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071224203250.GA23149@dmt \
--to=marcelo@kvack.org \
--cc=akpm@linux-foundation.org \
--cc=daniel.spang@gmail.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox