linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] /dev/zero page fault scaling
@ 2004-07-14 19:27 Brent Casavant
  2004-07-14 20:39 ` Hugh Dickins
  0 siblings, 1 reply; 10+ messages in thread
From: Brent Casavant @ 2004-07-14 19:27 UTC (permalink / raw)
  To: linux-mm

As discussed earlier this week on the linux-mm list, there are some
scaling issues with the sbinfo stat_lock in mm/shmem.c.  In particular,
bouncing the corresponding cache-line between CPUs in a large machine
causes a dramatic slowdown in page fault performance.

However, the superblock statistics being kept for the /dev/zero use
of this code are unnecessary, and I don't even think there's a way
to obtain them.  The attached patch causes the relevant sections of
code to skip the locks and statistic updates for /dev/zero, causing
a significant speedup.

In a test program to measure the page fault performance, at 256P we
see a 150x improvement in the number of page faults per cpu per
wall-clock second (and other similar measures).  Page fault performance
drops by about 50% at 512P compared to 256P, however this is likely
a seperate problem (investigation has not started), but is still
138x better than before these changes.

I'm not sure if this list is the appropriate place to submit these
changes.  If not, please direct me to the correct lists/people to
submit this to.  The patch is against 2.6.(something recent, maybe 7).

Signed-off-by: Brent Casavant <bcasavan@sgi.com>

--- linux.orig/mm/shmem.c	2004-07-13 17:20:34.000000000 -0500
+++ linux/mm/shmem.c	2004-07-13 17:09:32.000000000 -0500
@@ -60,6 +60,7 @@
 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
 #define SHMEM_PAGEIN	 VM_READ
 #define SHMEM_TRUNCATE	 VM_WRITE
+#define SHMEM_NOSBINFO	 VM_EXEC

 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
@@ -185,6 +186,9 @@
 static void shmem_free_block(struct inode *inode)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+
+	if (SHMEM_I(inode)->flags & SHMEM_NOSBINFO)
+		return;
 	spin_lock(&sbinfo->stat_lock);
 	sbinfo->free_blocks++;
 	inode->i_blocks -= BLOCKS_PER_PAGE;
@@ -213,11 +217,14 @@
 	if (freed > 0) {
 		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 		info->alloced -= freed;
+		shmem_unacct_blocks(info->flags, freed);
+
+		if (info->flags & SHMEM_NOSBINFO)
+			return;
 		spin_lock(&sbinfo->stat_lock);
 		sbinfo->free_blocks += freed;
 		inode->i_blocks -= freed*BLOCKS_PER_PAGE;
 		spin_unlock(&sbinfo->stat_lock);
-		shmem_unacct_blocks(info->flags, freed);
 	}
 }

@@ -351,14 +358,16 @@
 		 * page (and perhaps indirect index pages) yet to allocate:
 		 * a waste to allocate index if we cannot allocate data.
 		 */
-		spin_lock(&sbinfo->stat_lock);
-		if (sbinfo->free_blocks <= 1) {
+		if (!(info->flags & SHMEM_NOSBINFO)) {
+			spin_lock(&sbinfo->stat_lock);
+			if (sbinfo->free_blocks <= 1) {
+				spin_unlock(&sbinfo->stat_lock);
+				return ERR_PTR(-ENOSPC);
+			}
+			sbinfo->free_blocks--;
+			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
-			return ERR_PTR(-ENOSPC);
 		}
-		sbinfo->free_blocks--;
-		inode->i_blocks += BLOCKS_PER_PAGE;
-		spin_unlock(&sbinfo->stat_lock);

 		spin_unlock(&info->lock);
 		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
@@ -1002,16 +1005,24 @@
 	} else {
 		shmem_swp_unmap(entry);
 		sbinfo = SHMEM_SB(inode->i_sb);
-		spin_lock(&sbinfo->stat_lock);
-		if (sbinfo->free_blocks == 0 || shmem_acct_block(info->flags)) {
+		if (!(info->flags & SHMEM_NOSBINFO)) {
+			spin_lock(&sbinfo->stat_lock);
+			if (sbinfo->free_blocks == 0 || shmem_acct_block(info->flags)) {
+				spin_unlock(&sbinfo->stat_lock);
+				spin_unlock(&info->lock);
+				error = -ENOSPC;
+				goto failed;
+			}
+			sbinfo->free_blocks--;
+			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
-			spin_unlock(&info->lock);
-			error = -ENOSPC;
-			goto failed;
+		} else {
+			if (shmem_acct_block(info->flags)) {
+				spin_unlock(&info->lock);
+				error = -ENOSPC;
+				goto failed;
+			}
 		}
-		sbinfo->free_blocks--;
-		inode->i_blocks += BLOCKS_PER_PAGE;
-		spin_unlock(&sbinfo->stat_lock);

 		if (!filepage) {
 			spin_unlock(&info->lock);
@@ -2032,6 +2049,7 @@
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr this;
+	struct shmem_inode_info *info;

 	if (IS_ERR(shm_mnt))
 		return (void *)shm_mnt;
@@ -2061,7 +2079,11 @@
 	if (!inode)
 		goto close_file;

-	SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
+	info = SHMEM_I(inode);
+	info->flags = flags & VM_ACCOUNT;
+	if (0 == strcmp("dev/zero", name)) {
+		info->flags |= SHMEM_NOSBINFO;
+	}
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */

-- 
Brent Casavant             bcasavan@sgi.com        Forget bright-eyed and
Operating System Engineer  http://www.sgi.com/     bushy-tailed; I'm red-
Silicon Graphics, Inc.     44.8562N 93.1355W 860F  eyed and bushy-haired.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2004-08-02 14:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-07-14 19:27 [PATCH] /dev/zero page fault scaling Brent Casavant
2004-07-14 20:39 ` Hugh Dickins
2004-07-14 21:31   ` Brent Casavant
2004-07-15 16:28   ` Brent Casavant
2004-07-15 20:28     ` Hugh Dickins
2004-07-15 21:36       ` Brent Casavant
2004-07-15 21:52       ` Brent Casavant
2004-07-15 23:21         ` Hugh Dickins
2004-07-16 22:35       ` Brent Casavant
2004-08-02 14:37         ` Brent Casavant

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox