From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from there (h164n1fls31o925.telia.com [213.65.254.164]) by mailg.telia.com (8.11.2/8.11.0) with SMTP id f7L0Mf320610 for ; Tue, 21 Aug 2001 02:22:41 +0200 (CEST) Message-Id: <200108210022.f7L0Mf320610@mailg.telia.com> Content-Type: text/plain; charset="iso-8859-1" From: Roger Larsson Subject: [PATCH][RFC] using a memory_clock_interval Date: Tue, 21 Aug 2001 02:18:20 +0200 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Return-Path: To: linux-mm@kvack.org List-ID: It runs, lets ship it... First version of a patch that tries to USE a memory_clock to determine when to run kswapd... Limits needs tuning... but it runs with almost identical performace as the original. Note: that the rubberband is only for debug use... I will update it for latest kernel... but it might be a week away... /RogerL -- Roger Larsson Skelleftea Sweden ******************************************* Patch prepared by: roger.larsson@norran.net --- linux/mm/vmscan.c.orig Wed Aug 15 23:28:31 2001 +++ linux/mm/vmscan.c Sat Aug 18 01:38:09 2001 @@ -70,7 +70,8 @@ inactive += zone->inactive_clean_pages; inactive += zone->free_pages; - return (inactive > (zone->size / 3)); + return (inactive > zone->pages_high + + 2*pager_daemon.memory_clock_interval); } static unsigned int zone_free_plenty(zone_t *zone) @@ -80,7 +81,7 @@ free = zone->free_pages; free += zone->inactive_clean_pages; - return free > zone->pages_high*2; + return free > zone->pages_high + pager_daemon.memory_clock_interval; } /* mm->page_table_lock is held. mmap_sem is not held */ @@ -445,19 +446,37 @@ goto out; found_page: - memory_pressure++; del_page_from_inactive_clean_list(page); UnlockPage(page); page->age = PAGE_AGE_START; if (page_count(page) != 1) printk("VM: reclaim_page, found page with count %d!\n", page_count(page)); + + + out: spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); return page; } +void memory_clock_tick() +{ + static int memory_clock_prev; + + memory_clock++; + + /* prevent dangerous wrap difference due to extremely fast allocs */ + if (memory_clock - memory_clock_rubberband >= MEMORY_CLOCK_MAX_DIFF) + memory_clock_rubberband++; + + if (memory_clock - memory_clock_prev >= pager_daemon.memory_clock_interval) { + memory_clock_prev = memory_clock; + wakeup_kswapd(); + } +} + /** * page_launder - clean dirty inactive pages, move to inactive_clean list * @gfp_mask: what operations we are allowed to do @@ -743,7 +762,7 @@ { pg_data_t *pgdat; unsigned int global_free = 0; - unsigned int global_target = freepages.high; + unsigned int global_target = freepages.low; /* Are we low on free pages anywhere? */ pgdat = pgdat_list; @@ -778,7 +797,7 @@ int inactive_shortage(void) { pg_data_t *pgdat; - unsigned int global_target = freepages.high + inactive_target; + unsigned int global_target = freepages.high + pager_daemon.memory_clock_interval; unsigned int global_incative = 0; pgdat = pgdat_list; @@ -914,15 +933,8 @@ * Kswapd main loop. */ for (;;) { - static long recalc = 0; - - /* Once a second ... */ - if (time_after(jiffies, recalc + HZ)) { - recalc = jiffies; - - /* Recalculate VM statistics. */ - recalculate_vm_stats(); - } + /* Recalculate VM statistics. Time independent implementation */ + recalculate_vm_stats(); if (!do_try_to_free_pages(GFP_KSWAPD, 1)) { if (out_of_memory()) @@ -931,7 +943,7 @@ } run_task_queue(&tq_disk); - interruptible_sleep_on_timeout(&kswapd_wait, HZ); + interruptible_sleep_on_timeout(&kswapd_wait, 10*HZ); } } --- linux/mm/page_alloc.c.orig Wed Aug 15 23:55:24 2001 +++ linux/mm/page_alloc.c Sat Aug 18 01:08:54 2001 @@ -135,14 +135,6 @@ memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); - - /* - * We don't want to protect this variable from race conditions - * since it's nothing important, but we do want to make sure - * it never gets negative. - */ - if (memory_pressure > NR_CPUS) - memory_pressure--; } #define MARK_USED(index, order, area) \ @@ -286,11 +278,6 @@ struct page * page; /* - * Allocations put pressure on the VM subsystem. - */ - memory_pressure++; - - /* * (If anyone calls gfp from interrupts nonatomically then it * will sooner or later tripped up by a schedule().) * @@ -343,6 +330,11 @@ return page; /* + * Reclaiming a page results in pressure on the VM subsystem. + */ + memory_clock_tick(); + + /* * Then try to allocate a page from a zone with more * than zone->pages_low free + inactive_clean pages. * @@ -371,8 +363,8 @@ * - if we don't have __GFP_IO set, kswapd may be * able to free some memory we can't free ourselves */ - wakeup_kswapd(); if (gfp_mask & __GFP_WAIT) { + wakeup_kswapd(); /* only wakeup if we will sleep */ __set_current_state(TASK_RUNNING); current->policy |= SCHED_YIELD; schedule(); @@ -502,7 +494,7 @@ } /* No luck.. */ - printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order); + printk(KERN_ERR "__alloc_pages: %lu-order, 0x%lx-flags allocation failed.\n", order, current->flags); return NULL; } --- linux/mm/swap.c.orig Wed Aug 15 23:58:30 2001 +++ linux/mm/swap.c Fri Aug 17 23:20:19 2001 @@ -46,11 +46,11 @@ * is doing, averaged over a minute. We use this to determine how * many inactive pages we should have. * - * In reclaim_page and __alloc_pages: memory_pressure++ - * In __free_pages_ok: memory_pressure-- - * In recalculate_vm_stats the value is decayed (once a second) + * In reclaim_page: memory_clock++ + * In recalculate_vm_stats the memory_clock_rubberband is moved (once a second) */ -int memory_pressure; +int memory_clock; +int memory_clock_rubberband; /* We track the number of pages currently being asynchronously swapped out, so that we don't try to swap TOO many pages out at once */ @@ -72,6 +72,7 @@ 512, /* base number for calculating the number of tries */ SWAP_CLUSTER_MAX, /* minimum number of tries */ 8, /* do swap I/O in clusters of this size */ + 1000, /* memory_clock_interval */ }; /** @@ -201,13 +202,34 @@ * some useful statistics the VM subsystem uses to determine * its behaviour. */ + void recalculate_vm_stats(void) { - /* - * Substract one second worth of memory_pressure from - * memory_pressure. - */ - memory_pressure -= (memory_pressure >> INACTIVE_SHIFT); + static unsigned long jiffies_at_prev_update; + unsigned long jiffies_now = jiffies; + + if (jiffies_now != jiffies_at_prev_update) + { + long elapsed = jiffies_now - jiffies_at_prev_update; + + /* + * Substract one second worth of memory_pressure from + * memory_pressure. + */ + int old = memory_clock_rubberband; + + /* "exact" formula... can be optimised */ + int diff = (elapsed * (memory_clock - old) + (MEMORY_CLOCK_WINDOW * HZ + elapsed - 1)) / (MEMORY_CLOCK_WINDOW * HZ + elapsed); + + /* new can NEVER pass memory_clock since this is the only place were it is changed if the values + * are close but it will sooner or later catch up with it */ + int new = old + diff; + + memory_clock_rubberband = new; + + jiffies_at_prev_update = jiffies_now; + printk("VM: clock %5d target %5d\n", memory_clock, inactive_target); + } } /* --- linux/include/linux/swap.h.orig Wed Aug 15 23:36:27 2001 +++ linux/include/linux/swap.h Fri Aug 17 22:34:57 2001 @@ -99,7 +99,8 @@ struct zone_t; /* linux/mm/swap.c */ -extern int memory_pressure; +extern int memory_clock; +extern int memory_clock_rubberband; extern void deactivate_page(struct page *); extern void deactivate_page_nolock(struct page *); extern void activate_page(struct page *); @@ -119,6 +120,7 @@ extern int inactive_shortage(void); extern void wakeup_kswapd(void); extern int try_to_free_pages(unsigned int gfp_mask); +extern void memory_clock_tick(void); /* TODO: move this to swap.c or t.o.w.a. */ /* linux/mm/page_io.c */ extern void rw_swap_page(int, struct page *); @@ -249,16 +251,27 @@ ZERO_PAGE_BUG \ } + +/* + * The memory_clock_rubberband is calculated to be + * approximately where memory_clock were for + * MEMORY_CLOCK_WINDOW seconds since. + * Note: please use a power of two... + */ +#define MEMORY_CLOCK_WINDOW 2 + +/* to prevent overflow in calculations */ +#define MEMORY_CLOCK_MAX_DIFF ((1 << (8*sizeof(memory_clock) - 2)) / (MEMORY_CLOCK_WINDOW*HZ)) + /* - * In mm/swap.c::recalculate_vm_stats(), we substract - * inactive_target from memory_pressure every second. - * This means that memory_pressure is smoothed over - * 64 (1 << INACTIVE_SHIFT) seconds. + * The inactive_target is measured in pages/second + * In mm/swap.c::recalculate_vm_stats(), we move + * the memory_clock_rubberband + * Note: difference can never be negative, unsigned wrap is taken care of + * */ -#define INACTIVE_SHIFT 6 -#define inactive_min(a,b) ((a) < (b) ? (a) : (b)) -#define inactive_target inactive_min((memory_pressure >> INACTIVE_SHIFT), \ - (num_physpages / 4)) +#define inactive_target ((memory_clock - memory_clock_rubberband)/MEMORY_CLOCK_WINDOW) + /* * Ugly ugly ugly HACK to make sure the inactive lists --- linux/include/linux/swapctl.h.orig Fri Aug 17 23:10:34 2001 +++ linux/include/linux/swapctl.h Fri Aug 17 23:17:00 2001 @@ -23,13 +23,14 @@ typedef freepages_v1 freepages_t; extern freepages_t freepages; -typedef struct pager_daemon_v1 +typedef struct pager_daemon_v2 { unsigned int tries_base; unsigned int tries_min; unsigned int swap_cluster; -} pager_daemon_v1; -typedef pager_daemon_v1 pager_daemon_t; + unsigned int memory_clock_interval; +} pager_daemon_v2; +typedef pager_daemon_v2 pager_daemon_t; extern pager_daemon_t pager_daemon; #endif /* _LINUX_SWAPCTL_H */ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/