Re: Tiny one-line fix to swap readahead

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: "Stephen C. Tweedie" <sct@redhat.com>
To: "Stephen C. Tweedie" <sct@redhat.com>
Cc: Alan Cox <number6@the-village.bc.nu>,
	linux-mm@kvack.org, Rik van Riel <H.H.vanRiel@fys.ruu.nl>,
	linux-kernel@vger.rutgers.edu
Subject: Re: Tiny one-line fix to swap readahead
Date: Thu, 10 Dec 1998 19:22:30 GMT	[thread overview]
Message-ID: <199812101922.TAA17817@dax.scot.redhat.com> (raw)
In-Reply-To: <199812082259.WAA00875@dax.scot.redhat.com>

Hi,

On Tue, 8 Dec 1998 22:59:09 GMT, "Stephen C. Tweedie" <sct@redhat.com>
said:

> I just noticed this when experimenting with a slightly different swapin
> optimisation: swapping in entire 64k aligned blocks rather than doing
> strict readahead.  

Right, that optimisation now seems to be running happily, including
aligned pagein of mmap()ed regions as well as aligned cluster swapin.
The patch below is against 2.1.131-ac7.  The principle behind swapping
in aligned clusters is that we can rapidly page in entire large blocks
of a vm without having to worry about small holes between the regions we
read in when we are doing random access reads.

Performance is *markedly* improved, even against ac7, which was itself
the fastest vm I have ever benchmarked.  4MB boots comfortably.  8MB
compiles defrag over NFS in under four minutes: ac7 took 4:30 and even
2.0 took over 5 minutes.  Application startup is improved across the
board and even in 64MB, switching between X desktops is much faster than
before.

The default pagein cluster size is 16k (4 pages) for 16MB memory or
less; 32k for 32MB or less; otherwise 64MB.  This is tunable via
/proc/sys/vm/swap_cluster. 

This patch also includes the swap readahead bugfix to avoid touching
SWAP_MAP_BAD swap pages.

Enjoy,
  Stephen.

----------------------------------------------------------------
--- include/linux/mm.h.~1~	Thu Dec 10 16:38:41 1998
+++ include/linux/mm.h	Thu Dec 10 16:47:17 1998
@@ -11,6 +11,7 @@
 extern unsigned long max_mapnr;
 extern unsigned long num_physpages;
 extern void * high_memory;
+extern int swap_cluster;
 
 #include <asm/page.h>
 #include <asm/atomic.h>
--- include/linux/swap.h.~1~	Thu Dec 10 16:38:42 1998
+++ include/linux/swap.h	Thu Dec 10 16:47:25 1998
@@ -69,6 +69,9 @@
 /* linux/ipc/shm.c */
 extern int shm_swap (int, int);
 
+/* linux/mm/swap.c */
+extern void swap_setup (void);
+
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(unsigned int gfp_mask, int count);
 
--- include/linux/sysctl.h.~1~	Thu Dec 10 16:38:42 1998
+++ include/linux/sysctl.h	Thu Dec 10 16:43:29 1998
@@ -111,7 +111,8 @@
 	VM_BUFFERMEM=6,		/* struct: Set buffer memory thresholds */
 	VM_PAGECACHE=7,		/* struct: Set cache memory thresholds */
 	VM_PAGERDAEMON=8,	/* struct: Control kswapd behaviour */
-	VM_PGT_CACHE=9		/* struct: Set page table cache parameters */
+	VM_PGT_CACHE=9,		/* struct: Set page table cache parameters */
+	VM_SWAP_CLUSTER=10	/* int: set number of pages to swap together */
 };
 
 
--- kernel/sysctl.c.~1~	Thu Dec 10 16:38:43 1998
+++ kernel/sysctl.c	Thu Dec 10 17:04:19 1998
@@ -242,6 +242,8 @@
 	 &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
 	{VM_PGT_CACHE, "pagetable_cache", 
 	 &pgt_cache_water, 2*sizeof(int), 0600, NULL, &proc_dointvec},
+	{VM_SWAP_CLUSTER, "swap_cluster", 
+	 &swap_cluster, sizeof(int), 0600, NULL, &proc_dointvec},
 	{0}
 };
 
--- mm/filemap.c.~1~	Thu Dec 10 16:38:10 1998
+++ mm/filemap.c	Thu Dec 10 16:43:29 1998
@@ -231,6 +231,7 @@
 			page = mem_map;
 		}
 	} while (count_max > 0 && count_min > 0);
+	schedule();
 	return 0;
 }
 
@@ -974,7 +975,7 @@
 	struct file * file = area->vm_file;
 	struct dentry * dentry = file->f_dentry;
 	struct inode * inode = dentry->d_inode;
-	unsigned long offset;
+	unsigned long offset, reada, i;
 	struct page * page, **hash;
 	unsigned long old_page, new_page;
 
@@ -1035,7 +1036,19 @@
 	return new_page;
 
 no_cached_page:
-	new_page = __get_free_page(GFP_USER);
+	/*
+	 * Try to read in an entire cluster at once.
+	 */
+	reada   = offset;
+	reada >>= PAGE_SHIFT;
+	reada   = (reada / swap_cluster) * swap_cluster;
+	reada <<= PAGE_SHIFT;
+
+	for (i=0; i<swap_cluster; i++, reada += PAGE_SIZE)
+		new_page = try_to_read_ahead(file, reada, new_page);
+
+	if (!new_page)
+		new_page = __get_free_page(GFP_USER);
 	if (!new_page)
 		goto no_page;
 
@@ -1059,11 +1072,6 @@
 	if (inode->i_op->readpage(file, page) != 0)
 		goto failure;
 
-	/*
-	 * Do a very limited read-ahead if appropriate
-	 */
-	if (PageLocked(page))
-		new_page = try_to_read_ahead(file, offset + PAGE_SIZE, 0);
 	goto found_page;
 
 page_locked_wait:
--- mm/page_alloc.c.~1~	Thu Dec 10 16:38:43 1998
+++ mm/page_alloc.c	Thu Dec 10 16:44:51 1998
@@ -359,27 +359,28 @@
 	return start_mem;
 }
 
-/*
- * Primitive swap readahead code. We simply read the
- * next 8 entries in the swap area. This method is
- * chosen because it doesn't cost us any seek time.
- * We also make sure to queue the 'original' request
- * together with the readahead ones...
+/* 
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (swap_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time.  We also make sure to queue
+ * the 'original' request together with the readahead ones...  
  */
 void swapin_readahead(unsigned long entry) {
         int i;
         struct page *new_page;
 	unsigned long offset = SWP_OFFSET(entry);
 	struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
-
-	for (i = 0; i < 8; i++) {
+	
+	offset = (offset/swap_cluster) * swap_cluster;
+	
+	for (i = 0; i < swap_cluster; i++) {
 	      if (offset >= swapdev->max
 		              || nr_free_pages - atomic_read(&nr_async_pages) <
 			      (freepages.high + freepages.low)/2)
 		      return;
 	      if (!swapdev->swap_map[offset] ||
 		  swapdev->swap_map[offset] == SWAP_MAP_BAD ||
-                  test_bit(offset, swapdev->swap_lockmap))
+		  test_bit(offset, swapdev->swap_lockmap))
 		      continue;
 	      new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
 	      if (new_page != NULL)
--- mm/swap.c.~1~	Thu Dec 10 16:38:43 1998
+++ mm/swap.c	Thu Dec 10 16:44:09 1998
@@ -39,6 +39,9 @@
 	144	/* freepages.high */
 };
 
+/* How many pages do we try to swap or page in/out together? */
+int swap_cluster = 16; /* Default value modified in swap_setup() */
+
 /* We track the number of pages currently being asynchronously swapped
    out, so that we don't try to swap TOO many pages out at once */
 atomic_t nr_async_pages = ATOMIC_INIT(0);
@@ -77,3 +80,19 @@
 	SWAP_CLUSTER_MAX,	/* minimum number of tries */
 	SWAP_CLUSTER_MAX,	/* do swap I/O in clusters of this size */
 };
+
+
+/*
+ * Perform any setup for the swap system
+ */
+
+void __init swap_setup(void)
+{
+	/* Use a smaller cluster for memory <16MB or <32MB */
+	if (num_physpages < ((16 * 1024 * 1024) >> PAGE_SHIFT))
+		swap_cluster = 4;
+	else if (num_physpages < ((32 * 1024 * 1024) >> PAGE_SHIFT))
+		swap_cluster = 8;
+	else
+		swap_cluster = 16;
+}
--- mm/vmscan.c.~1~	Thu Dec 10 16:38:43 1998
+++ mm/vmscan.c	Thu Dec 10 16:43:29 1998
@@ -469,6 +469,8 @@
        int i;
        char *revision="$Revision: 1.5 $", *s, *e;
 
+       swap_setup();
+       
        if ((s = strchr(revision, ':')) &&
            (e = strchr(s, '$')))
                s++, i = e - s;
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

     prev parent reply	other threads:[~1998-12-10 19:22 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
1998-12-08 22:59 Stephen C. Tweedie
1998-12-10 19:22 ` Stephen C. Tweedie [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=199812101922.TAA17817@dax.scot.redhat.com \
    --to=sct@redhat.com \
    --cc=H.H.vanRiel@fys.ruu.nl \
    --cc=linux-kernel@vger.rutgers.edu \
    --cc=linux-mm@kvack.org \
    --cc=number6@the-village.bc.nu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox