linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [Patch] shm cleanups
@ 1999-11-03 19:17 Christoph Rohland
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Rohland @ 1999-11-03 19:17 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: MM mailing list, Kanoj Sarcar, Ingo Molnar

[-- Attachment #1: patch-25-shm3 --]
[-- Type: application/octet-stream, Size: 43680 bytes --]

diff -uNr 2.3.25/arch/i386/kernel/sys_i386.c 2.3.25-shm3/arch/i386/kernel/sys_i386.c
--- 2.3.25/arch/i386/kernel/sys_i386.c	Fri Dec 18 01:27:35 1998
+++ 2.3.25-shm3/arch/i386/kernel/sys_i386.c	Wed Nov  3 11:28:11 1999
@@ -116,86 +116,71 @@
 	version = call >> 16; /* hack for backward compatibility */
 	call &= 0xffff;
 
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
+        switch (call) {
+        case SEMOP:
+                return sys_semop (first, (struct sembuf *)ptr, second);
+        case SEMGET:
+                return sys_semget (first, second, third);
+        case SEMCTL: {
+                union semun fourth;
+                if (!ptr)
+                        return -EINVAL;
+                if (get_user(fourth.__pad, (void **) ptr))
+                        return -EFAULT;
+                return sys_semctl (first, second, third, fourth);
+        }
 
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				
-				if (copy_from_user(&tmp,
-						   (struct ipc_kludge *) ptr, 
-						   sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = sys_shmat (first, (char *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong *) third);
-			}
-			case 1:	/* iBCS2 emulator entry point */
-				if (!segment_eq(get_fs(), get_ds()))
-					return -EINVAL;
-				return sys_shmat (first, (char *) ptr,
-						  second, (ulong *) third);
-			}
-		case SHMDT: 
-			return sys_shmdt ((char *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	
-	return -EINVAL;
+        case MSGSND:
+                return sys_msgsnd (first, (struct msgbuf *) ptr, 
+                                   second, third);
+        case MSGRCV:
+                switch (version) {
+                case 0: {
+                        struct ipc_kludge tmp;
+                        if (!ptr)
+                                return -EINVAL;
+                        
+                        if (copy_from_user(&tmp,
+                                           (struct ipc_kludge *) ptr, 
+                                           sizeof (tmp)))
+                                return -EFAULT;
+                        return sys_msgrcv (first, tmp.msgp, second,
+                                           tmp.msgtyp, third);
+                }
+                default:
+                        return sys_msgrcv (first,
+                                           (struct msgbuf *) ptr,
+                                           second, fifth, third);
+                }
+        case MSGGET:
+                return sys_msgget ((key_t) first, second);
+        case MSGCTL:
+                return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+
+        case SHMAT:
+                switch (version) {
+                default: {
+                        ulong raddr;
+                        ret = sys_shmat (first, (char *) ptr, second, &raddr);
+                        if (ret)
+                                return ret;
+                        return put_user (raddr, (ulong *) third);
+                }
+                case 1:	/* iBCS2 emulator entry point */
+                        if (!segment_eq(get_fs(), get_ds()))
+                                return -EINVAL;
+                        return sys_shmat (first, (char *) ptr, second, (ulong *) third);
+                }
+        case SHMDT: 
+                return sys_shmdt ((char *)ptr);
+        case SHMGET:
+                return sys_shmget (first, second, third);
+        case SHMCTL:
+                return sys_shmctl (first, second,
+                                   (struct shmid_ds *) ptr);
+        default:
+                return -EINVAL;
+        }
 }
 
 /*
diff -uNr 2.3.25/include/asm-alpha/shmparam.h 2.3.25-shm3/include/asm-alpha/shmparam.h
--- 2.3.25/include/asm-alpha/shmparam.h	Mon Oct  7 14:12:29 1996
+++ 2.3.25-shm3/include/asm-alpha/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -1,47 +1,6 @@
 #ifndef _ASMAXP_SHMPARAM_H
 #define _ASMAXP_SHMPARAM_H
 
-/*
- * Address range for shared memory attaches if no address passed to shmat().
- */
-#define SHM_RANGE_START	0x14000000000
-#define SHM_RANGE_END	0x15000000000
-
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the Alpha and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x3fa000			/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _ASMAXP_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-arm/proc-armo/shmparam.h 2.3.25-shm3/include/asm-arm/proc-armo/shmparam.h
--- 2.3.25/include/asm-arm/proc-armo/shmparam.h	Wed Jan 21 01:39:42 1998
+++ 2.3.25-shm3/include/asm-arm/proc-armo/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -9,9 +9,7 @@
 #ifndef __ASM_PROC_SHMPARAM_H
 #define __ASM_PROC_SHMPARAM_H
 
-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START	0x00a00000
-#define SHM_RANGE_END	0x00c00000
+#ifndef SHMMAX
 #define SHMMAX		0x003fa000
 #endif
 
diff -uNr 2.3.25/include/asm-arm/proc-armv/shmparam.h 2.3.25-shm3/include/asm-arm/proc-armv/shmparam.h
--- 2.3.25/include/asm-arm/proc-armv/shmparam.h	Wed Jan 21 01:39:43 1998
+++ 2.3.25-shm3/include/asm-arm/proc-armv/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -10,9 +10,7 @@
 #ifndef __ASM_PROC_SHMPARAM_H
 #define __ASM_PROC_SHMPARAM_H
 
-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
+#ifndef SHMMAX
 #define SHMMAX		0x01000000
 #endif
 
diff -uNr 2.3.25/include/asm-arm/shmparam.h 2.3.25-shm3/include/asm-arm/shmparam.h
--- 2.3.25/include/asm-arm/shmparam.h	Mon Oct 25 09:25:29 1999
+++ 2.3.25-shm3/include/asm-arm/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -3,39 +3,6 @@
 
 #include <asm/proc/shmparam.h>
 
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _ASMARM_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-i386/shmparam.h 2.3.25-shm3/include/asm-i386/shmparam.h
--- 2.3.25/include/asm-i386/shmparam.h	Sun Nov  8 23:06:18 1998
+++ 2.3.25-shm3/include/asm-i386/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -1,46 +1,6 @@
 #ifndef _ASMI386_SHMPARAM_H
 #define _ASMI386_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _ASMI386_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-m68k/shmparam.h 2.3.25-shm3/include/asm-m68k/shmparam.h
--- 2.3.25/include/asm-m68k/shmparam.h	Mon Oct 18 14:35:02 1999
+++ 2.3.25-shm3/include/asm-m68k/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,49 +1,6 @@
 #ifndef _M68K_SHMPARAM_H
 #define _M68K_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#ifndef CONFIG_SUN3
-#define SHM_RANGE_START	0xC0000000
-#define SHM_RANGE_END	0xD0000000
-#else
-#define SHM_RANGE_START	0x0C000000
-#define SHM_RANGE_END	0x0D000000
-#endif
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _M68K_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-mips/shmparam.h 2.3.25-shm3/include/asm-mips/shmparam.h
--- 2.3.25/include/asm-mips/shmparam.h	Sat Jun 26 02:37:53 1999
+++ 2.3.25-shm3/include/asm-mips/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,49 +1,9 @@
 #ifndef __ASM_MIPS_SHMPARAM_H
 #define __ASM_MIPS_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
 /*
  * This constant is very large but the ABI in it's wisdom says ...
  */
 #define	SHMLBA 0x40000			/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
 
 #endif /* __ASM_MIPS_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-ppc/shmparam.h 2.3.25-shm3/include/asm-ppc/shmparam.h
--- 2.3.25/include/asm-ppc/shmparam.h	Mon Oct 18 14:35:24 1999
+++ 2.3.25-shm3/include/asm-ppc/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,44 +1,6 @@
 #ifndef _PPC_SHMPARAM_H
 #define _PPC_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _PPC_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sh/shmparam.h 2.3.25-shm3/include/asm-sh/shmparam.h
--- 2.3.25/include/asm-sh/shmparam.h	Mon Oct 18 14:34:54 1999
+++ 2.3.25-shm3/include/asm-sh/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,46 +1,6 @@
 #ifndef __ASM_SH_SHMPARAM_H
 #define __ASM_SH_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* __ASM_SH_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sparc/shmparam.h 2.3.25-shm3/include/asm-sparc/shmparam.h
--- 2.3.25/include/asm-sparc/shmparam.h	Sun Oct  4 19:22:44 1998
+++ 2.3.25-shm3/include/asm-sparc/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -2,44 +2,6 @@
 #ifndef _ASMSPARC_SHMPARAM_H
 #define _ASMSPARC_SHMPARAM_H
 
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x10000000
-#define SHM_RANGE_END	0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
 
 #endif /* _ASMSPARC_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sparc64/shmparam.h 2.3.25-shm3/include/asm-sparc64/shmparam.h
--- 2.3.25/include/asm-sparc64/shmparam.h	Sun Oct  4 19:22:44 1998
+++ 2.3.25-shm3/include/asm-sparc64/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -2,46 +2,6 @@
 #ifndef _ASMSPARC64_SHMPARAM_H
 #define _ASMSPARC64_SHMPARAM_H
 
-/* XXX Redo most of this... */
-
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x10000000
-#define SHM_RANGE_END	0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
 #define	SHMLBA (PAGE_SIZE<<1)		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
 
 #endif /* _ASMSPARC64_SHMPARAM_H */
diff -uNr 2.3.25/include/linux/shm.h 2.3.25-shm3/include/linux/shm.h
--- 2.3.25/include/linux/shm.h	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/include/linux/shm.h	Wed Nov  3 11:28:16 1999
@@ -3,6 +3,17 @@
 
 #include <linux/ipc.h>
 
+/*
+ * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
+ * be increased by sysctl
+ */
+
+#define SHMMAX 0x2000000		 /* max shared seg size (bytes) */
+#define SHMMIN 1 /* really PAGE_SIZE */	 /* min shared seg size (bytes) */
+#define SHMMNI 128			 /* max num of segs system wide */
+#define SHMALL (SHMMAX/PAGE_SIZE*SHMMNI) /* max shm system wide (pages) */
+#define SHMSEG SHMMNI			 /* max shared segs per process */
+
 #include <asm/shmparam.h>
 
 struct shmid_ds {
@@ -17,15 +28,6 @@
 	unsigned short 		shm_unused;	/* compatibility */
 	void 			*shm_unused2;	/* ditto - used by DIPC */
 	void			*shm_unused3;	/* unused */
-};
-
-struct shmid_kernel
-{	
-	struct shmid_ds		u;
-	/* the following are private */
-	unsigned long		shm_npages;	/* size of segment (pages) */
-	pte_t			*shm_pages;	/* array of ptrs to frames -> SHMMAX */ 
-	struct vm_area_struct	*attaches;	/* descriptors for attaches */
 };
 
 /* permission flag for shmget */
diff -uNr 2.3.25/include/linux/swap.h 2.3.25-shm3/include/linux/swap.h
--- 2.3.25/include/linux/swap.h	Wed Nov  3 18:00:50 1999
+++ 2.3.25-shm3/include/linux/swap.h	Wed Nov  3 11:55:52 1999
@@ -122,17 +122,6 @@
 asmlinkage long sys_swapoff(const char *);
 asmlinkage long sys_swapon(const char *, int);
 
-/*
- * vm_ops not present page codes for shared memory.
- *
- * Will go away eventually..
- */
-#define SHM_SWP_TYPE 0x20
-
-/*
- * swap cache stuff (in linux/mm/swap_state.c)
- */
-
 #define SWAP_CACHE_INFO
 
 #ifdef SWAP_CACHE_INFO
diff -uNr 2.3.25/include/linux/sysctl.h 2.3.25-shm3/include/linux/sysctl.h
--- 2.3.25/include/linux/sysctl.h	Thu Oct 28 09:26:49 1999
+++ 2.3.25-shm3/include/linux/sysctl.h	Wed Nov  3 19:58:43 1999
@@ -103,8 +103,7 @@
 	KERN_MSGPOOL=37,        /* int: Maximum system message pool size */
 	KERN_SYSRQ=38,		/* int: Sysreq enable */
 	KERN_MAX_THREADS=39,    /* int: Maximum nr of threads in the system */
- 	KERN_RANDOM=40,		/* Random driver */
- 	KERN_SHMALL=41		/* int: Maximum size of shared memory */
+ 	KERN_RANDOM=40		/* Random driver */
 };
 
 
diff -uNr 2.3.25/ipc/shm.c 2.3.25-shm3/ipc/shm.c
--- 2.3.25/ipc/shm.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/ipc/shm.c	Wed Nov  3 11:55:39 1999
@@ -9,6 +9,8 @@
  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
+ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
+ *                         Christoph Rohland <hans-christoph.rohland@sap.com>
  */
 
 #include <linux/config.h>
@@ -25,7 +27,17 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
+#include "util.h"
+
+struct shmid_kernel /* extend struct shmis_ds with private fields */
+{	
+	struct shmid_ds		u;
+	unsigned long		shm_npages; /* size of segment (pages) */
+	pte_t			**shm_dir;  /* ptr to array of ptrs to frames -> SHMMAX */ 
+	struct vm_area_struct	*attaches;  /* descriptors for attaches */
+        int                     id; /* backreference to id for shm_close */
+};
+
 static int findkey (key_t key);
 static int newseg (key_t key, int shmflg, size_t size);
 static int shm_map (struct vm_area_struct *shmd);
@@ -38,13 +50,15 @@
 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #endif
 
+unsigned int shm_prm[3] = {SHMMAX, SHMALL, SHMMNI};
+
 static int shm_tot = 0; /* total number of shared memory pages */
 static int shm_rss = 0; /* number of shared memory pages that are in memory */
 static int shm_swp = 0; /* number of shared memory pages that are in swap */
-static int max_shmid = 0; /* every used id is <= max_shmid */
+static int max_shmid = -1; /* every used id is <= max_shmid */
 static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
-static struct shmid_kernel *shm_segs[SHMMNI];
-
+static struct shmid_kernel **shm_segs = NULL;
+static unsigned int num_segs = 0;
 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
 
 spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
@@ -56,22 +70,109 @@
 
 void __init shm_init (void)
 {
-	int id;
-
-	for (id = 0; id < SHMMNI; id++)
-		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
-	shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
-	init_waitqueue_head(&shm_wait);
 #ifdef CONFIG_PROC_FS
 	create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
 #endif
 	return;
 }
 
+#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+
+static pte_t **shm_alloc(unsigned long pages)
+{
+        unsigned short dir  = pages / PTRS_PER_PTE;
+        unsigned short last = pages % PTRS_PER_PTE;
+        pte_t **ret, **ptr;
+
+        ret = kmalloc ((dir+1) * sizeof(unsigned long), GFP_KERNEL);
+        if (ret == NULL)
+                return NULL;
+
+        for (ptr = ret; ptr < ret+dir ; ptr++)
+        {
+                *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
+                if (*ptr == NULL)
+                        goto free;
+		memset (*ptr, 0, PAGE_SIZE); 
+        }
+
+        /* The last one is probably not of PAGE_SIZE: we use kmalloc */
+        if (last) {
+                *ptr = kmalloc (last*sizeof(pte_t *), GFP_KERNEL);
+                if (*ptr == NULL)
+                        goto free;
+		memset (*ptr, 0, last*sizeof(pte_t *));
+        }
+        
+        return ret;
+
+free:
+        /* The last failed: we decrement first */
+        while (--ptr >= ret)
+                free_page ((unsigned long)*ptr);
+
+        kfree (ret);
+        return NULL;
+}
+
+
+static void shm_free(pte_t** dir, unsigned long pages)
+{
+        pte_t **ptr = dir+pages/PTRS_PER_PTE;
+
+        /* first the last page */
+        if (pages%PTRS_PER_PTE)
+                kfree (*ptr);
+        /* now the whole pages */
+        while (--ptr >= dir)
+                free_page ((unsigned long)*ptr);
+
+        /* Now the indirect block */
+        kfree (dir);
+}
+
+static int shm_expand (unsigned int size)
+{
+        int id;
+        struct shmid_kernel ** new_array;
+
+        spin_unlock(&shm_lock);
+	new_array = kmalloc (size * sizeof(struct shmid_kernel *), GFP_KERNEL);
+        spin_lock(&shm_lock);
+
+	if (!new_array)
+                return -ENOMEM;
+
+        if (size <= num_segs){ /* We check this after kmalloc so
+                                   nobody changes num_segs afterwards */
+                /*
+                 * We never shrink the segment. If we shrink we have to
+                 * check for stale handles in newseg
+                 */
+                kfree (new_array);
+                return 0;
+        }
+
+        if (num_segs) {
+                memcpy (new_array, shm_segs,
+                        size*sizeof(struct shmid_kernel *));
+                kfree (shm_segs);
+        }
+        for (id = num_segs; id < size; id++) 
+		new_array[id] = (void *) IPC_UNUSED;
+
+        shm_segs = new_array;
+        num_segs = size;
+        return 0;
+}
+
 static int findkey (key_t key)
 {
 	int id;
 	struct shmid_kernel *shp;
+        
+        if (!num_segs)
+                return -1;
 
 	for (id = 0; id <= max_shmid; id++) {
 		if ((shp = shm_segs[id]) == IPC_NOID) {
@@ -89,9 +190,8 @@
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&shm_wait, &wait);
 		}
-		if (shp == IPC_UNUSED)
-			continue;
-		if (key == shp->u.shm_perm.key)
+		if (shp != IPC_UNUSED &&
+                    key == shp->u.shm_perm.key)
 			return id;
 	}
 	return -1;
@@ -99,18 +199,33 @@
 
 /*
  * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
+ * This has to be called with the shm_lock held
  */
 static int newseg (key_t key, int shmflg, size_t size)
 {
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
-	int id;
+	int id, err;
+        unsigned int shmall, shmmni;
 
+        lock_kernel();
+        shmall = shm_prm[1];
+        shmmni = shm_prm[2];
+        if (shmmni > IPCMNI) {
+                printk ("shmmni reset to max of %u\n", IPCMNI);
+                shmmni = shm_prm[2] = IPCMNI;
+        }
+        unlock_kernel();
+
+        if (shmmni < used_segs)
+                return -ENOSPC;
+        if ((err = shm_expand (shmmni)))
+                return err;
 	if (size < SHMMIN)
 		return -EINVAL;
-	if (shm_tot + numpages >= SHMALL)
+	if (shm_tot + numpages >= shmall)
 		return -ENOSPC;
-	for (id = 0; id < SHMMNI; id++)
+	for (id = 0; id < num_segs; id++)
 		if (shm_segs[id] == IPC_UNUSED) {
 			shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
 			goto found;
@@ -126,10 +241,8 @@
 		wake_up (&shm_wait);
 		return -ENOMEM;
 	}
-	lock_kernel();
-	shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
-	unlock_kernel();
-	if (!shp->shm_pages) {
+	shp->shm_dir = shm_alloc (numpages);
+	if (!shp->shm_dir) {
 		kfree(shp);
 		spin_lock(&shm_lock);
 		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
@@ -137,8 +250,6 @@
 		return -ENOMEM;
 	}
 
-	memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
-
 	shp->u.shm_perm.key = key;
 	shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
 	shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -150,6 +261,7 @@
 	shp->u.shm_atime = shp->u.shm_dtime = 0;
 	shp->u.shm_ctime = CURRENT_TIME;
 	shp->shm_npages = numpages;
+        shp->id = id;
 
 	spin_lock(&shm_lock);
 
@@ -161,21 +273,25 @@
 	shm_segs[id] = shp;
 	used_segs++;
 	wake_up (&shm_wait);
-	return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
+	return (unsigned int) shp->u.shm_perm.seq * IPCMNI + id;
 }
 
-size_t shmmax = SHMMAX;
-
 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
 {
 	struct shmid_kernel *shp;
 	int err, id = 0;
+        size_t shmmax;
+
+        lock_kernel();
+        shmmax = shm_prm[0];
+        unlock_kernel();
+
+	if (size > shmmax)
+		return -EINVAL;
 
 	down(&current->mm->mmap_sem);
 	spin_lock(&shm_lock);
-	if (size > shmmax) {
-		err = -EINVAL;
-	} else if (key == IPC_PRIVATE) {
+	if (key == IPC_PRIVATE) {
 		err = newseg(key, shmflg, size);
 	} else if ((id = findkey (key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
@@ -193,7 +309,7 @@
 		else if (ipcperms (&shp->u.shm_perm, shmflg))
 			err = -EACCES;
 		else
-			err = (int) shp->u.shm_perm.seq * SHMMNI + id;
+			err = (int) shp->u.shm_perm.seq * IPCMNI + id;
 	}
 	spin_unlock(&shm_lock);
 	up(&current->mm->mmap_sem);
@@ -214,18 +330,18 @@
 	if (shp == IPC_NOID || shp == IPC_UNUSED)
 		BUG();
 	shp->u.shm_perm.seq++;     /* for shmat */
-	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
+	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/IPCMNI); /* increment, but avoid overflow */
 	shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
 	used_segs--;
 	if (id == max_shmid)
-		while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
-	if (!shp->shm_pages)
-		BUG();
+		while (max_shmid-- > 0 && (shm_segs[max_shmid] == IPC_UNUSED));
+	if (!shp->shm_dir)
+                BUG();
 	spin_unlock(&shm_lock);
 	numpages = shp->shm_npages;
 	for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
 		pte_t pte;
-		pte = shp->shm_pages[i];
+		pte = SHM_ENTRY (shp,i);
 		if (pte_none(pte))
 			continue;
 		if (pte_present(pte)) {
@@ -238,9 +354,7 @@
 			swp++;
 		}
 	}
-	lock_kernel();
-	vfree(shp->shm_pages);
-	unlock_kernel();
+	shm_free (shp->shm_dir, numpages);
 	kfree(shp);
 	spin_lock(&shm_lock);
 	shm_rss -= rss;
@@ -269,19 +383,20 @@
 	case IPC_INFO:
 	{
 		struct shminfo shminfo;
+		spin_unlock(&shm_lock);
 		err = -EFAULT;
 		if (!buf)
 			goto out;
-		shminfo.shmmni = SHMMNI;
-		shminfo.shmmax = shmmax;
+                lock_kernel();
+		shminfo.shmmni = shminfo.shmseg = shm_prm[2];
+		shminfo.shmmax = shm_prm[0];
+		shminfo.shmall = shm_prm[1];
+                unlock_kernel();
 		shminfo.shmmin = SHMMIN;
-		shminfo.shmall = SHMALL;
-		shminfo.shmseg = SHMSEG;
-		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_INFO:
@@ -298,7 +413,7 @@
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_STAT:
@@ -310,7 +425,7 @@
 			goto out;
 		if (ipcperms (&shp->u.shm_perm, S_IRUGO))
 			goto out;
-		id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
+		id = (unsigned int) shp->u.shm_perm.seq * IPCMNI + shmid;
 		err = -EFAULT;
 		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shp->u, sizeof(*buf)))
@@ -320,12 +435,13 @@
 		goto out;
 	}
 
-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
 	err = -EINVAL;
-	if (shp == IPC_UNUSED || shp == IPC_NOID)
+	if ((id = (unsigned int) shmid % IPCMNI) > max_shmid)
+		goto out;
+	if ((shp = shm_segs[id]) == IPC_UNUSED || shp == IPC_NOID)
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;
 	ipcp = &shp->u.shm_perm;
 
@@ -480,7 +596,7 @@
 	if (shmid < 0)
 		goto out;
 
-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
+	shp = shm_segs[id = (unsigned int) shmid % IPCMNI];
 	if (shp == IPC_UNUSED || shp == IPC_NOID)
 		goto out;
 
@@ -523,7 +639,7 @@
 	if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;
 
 	spin_unlock(&shm_lock);
@@ -532,13 +648,13 @@
 	spin_lock(&shm_lock);
 	if (!shmd)
 		goto out;
-	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
+	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)) {
 		kmem_cache_free(vm_area_cachep, shmd);
 		err = -EIDRM;
 		goto out;
 	}
 
-	shmd->vm_private_data = shm_segs + id;
+	shmd->vm_private_data = shm_segs[id];
 	shmd->vm_start = addr;
 	shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
 	shmd->vm_mm = current->mm;
@@ -584,7 +700,7 @@
 	struct shmid_kernel *shp;
 
 	spin_lock(&shm_lock);
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	insert_attach(shp,shmd);  /* insert shmd into shp->attaches */
 	shp->u.shm_nattch++;
 	shp->u.shm_atime = CURRENT_TIME;
@@ -604,14 +720,12 @@
 
 	spin_lock(&shm_lock);
 	/* remove from the list of attaches of the shm segment */
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	remove_attach(shp,shmd);  /* remove from shp->attaches */
   	shp->u.shm_lpid = current->pid;
 	shp->u.shm_dtime = CURRENT_TIME;
-	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
-		unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
-		killseg (id);
-	}
+	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+		killseg (shp->id);
 	spin_unlock(&shm_lock);
 }
 
@@ -648,7 +762,7 @@
 }
 
 /*
- * page not present ... go through shm_pages
+ * page not present ... go through shm_dir
  */
 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
 {
@@ -657,13 +771,13 @@
 	unsigned int idx;
 	struct page * page;
 
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	idx = (address - shmd->vm_start) >> PAGE_SHIFT;
 	idx += shmd->vm_pgoff;
 
 	spin_lock(&shm_lock);
 again:
-	pte = shp->shm_pages[idx];
+	pte = SHM_ENTRY(shp,idx);
 	if (!pte_present(pte)) {
 		if (pte_none(pte)) {
 			spin_unlock(&shm_lock);
@@ -672,7 +786,7 @@
 				goto oom;
 			clear_highpage(page);
 			spin_lock(&shm_lock);
-			if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
+			if (pte_val(pte) != pte_val(SHM_ENTRY(shp, idx)))
 				goto changed;
 		} else {
 			swp_entry_t entry = pte_to_swp_entry(pte);
@@ -694,18 +808,18 @@
 			unlock_kernel();
 			spin_lock(&shm_lock);
 			shm_swp--;
-			pte = shp->shm_pages[idx];
+			pte = SHM_ENTRY(shp, idx);
 			if (pte_present(pte))
 				goto present;
 		}
 		shm_rss++;
 		pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-		shp->shm_pages[idx] = pte;
+		SHM_ENTRY(shp, idx) = pte;
 	} else
 		--current->maj_flt;  /* was incremented in do_no_page */
 
 done:
-	/* pte_val(pte) == shp->shm_pages[idx] */
+	/* pte_val(pte) == SHM_ENTRY (shp, idx) */
 	get_page(pte_page(pte));
 	spin_unlock(&shm_lock);
 	current->min_flt++;
@@ -770,7 +884,7 @@
 	if (idx >= shp->shm_npages)
 		goto next_id;
 
-	page = shp->shm_pages[idx];
+	page = SHM_ENTRY(shp, idx);
 	if (!pte_present(page))
 		goto check_table;
 	page_map = pte_page(page);
@@ -792,7 +906,7 @@
 		goto check_table;
 	if (!(page_map = prepare_highmem_swapout(page_map)))
 		goto check_table;
-	shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
+	SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
@@ -812,12 +926,12 @@
  * Free the swap entry and set the new pte for the shm page.
  */
 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
-			swp_entry_t entry, struct page *page)
+			   swp_entry_t entry, struct page *page)
 {
 	pte_t pte;
 
 	pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-	shp->shm_pages[idx] = pte;
+	SHM_ENTRY(shp, idx) = pte;
 	get_page(page);
 	shm_rss++;
 
@@ -837,16 +951,16 @@
 	int i, n;
 
 	spin_lock(&shm_lock);
-	for (i = 0; i < SHMMNI; i++) {
+	for (i = 0; i <= max_shmid; i++) {
 		struct shmid_kernel *seg = shm_segs[i];
 		if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
 			continue;
 		for (n = 0; n < seg->shm_npages; n++) {
-			if (pte_none(seg->shm_pages[n]))
+			if (pte_none(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_present(seg->shm_pages[n]))
+			if (pte_present(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_to_swp_entry(seg->shm_pages[n]).val == entry.val) {
+			if (pte_to_swp_entry(SHM_ENTRY(seg,n)).val == entry.val) {
 				shm_unuse_page(seg, n, entry, page);
 				return;
 			}
@@ -865,7 +979,7 @@
     	len += sprintf(buffer, "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n");
 
 	spin_lock(&shm_lock);
-    	for(i = 0; i < SHMMNI; i++)
+    	for(i = 0; i <= max_shmid; i++)
 		if(shm_segs[i] != IPC_UNUSED) {
 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
@@ -877,7 +991,7 @@
 				format = BIG_STRING;
 	    		len += sprintf(buffer + len, format,
 			shm_segs[i]->u.shm_perm.key,
-			shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
+			shm_segs[i]->u.shm_perm.seq * IPCMNI + i,
 			shm_segs[i]->u.shm_perm.mode,
 			shm_segs[i]->u.shm_segsz,
 			shm_segs[i]->u.shm_cpid,
diff -uNr 2.3.25/ipc/util.c 2.3.25-shm3/ipc/util.c
--- 2.3.25/ipc/util.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/ipc/util.c	Wed Nov  3 20:04:41 1999
@@ -14,6 +14,8 @@
 #include <linux/init.h>
 #include <linux/msg.h>
 
+#include "util.h"
+
 #if defined(CONFIG_SYSVIPC)
 
 extern void sem_init (void), msg_init (void), shm_init (void);
diff -uNr 2.3.25/ipc/util.h 2.3.25-shm3/ipc/util.h
--- 2.3.25/ipc/util.h	Thu Jan  1 01:00:00 1970
+++ 2.3.25-shm3/ipc/util.h	Wed Nov  3 11:28:16 1999
@@ -0,0 +1,12 @@
+/*
+ * linux/ipc/util.h
+ * Copyright (C) 1999 Christoph Rohland
+ */
+
+/*
+ * IPCMNI is the absolute maximum for ipc identifier. This is used to
+ * detect stale identifiers
+ */
+#define IPCMNI (1<<15)          
+
+extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
diff -uNr 2.3.25/kernel/sysctl.c 2.3.25-shm3/kernel/sysctl.c
--- 2.3.25/kernel/sysctl.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/kernel/sysctl.c	Wed Nov  3 11:32:02 1999
@@ -49,7 +49,7 @@
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-extern size_t shmmax;
+extern size_t shm_prm[];
 #endif
 
 #ifdef __sparc__
@@ -213,7 +213,7 @@
 	{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
 	 0644, NULL, &proc_dointvec},
 #ifdef CONFIG_SYSVIPC
-	{KERN_SHMMAX, "shmmax", &shmmax, sizeof (size_t),
+	{KERN_SHMMAX, "shmmax", &shm_prm, 3*sizeof (size_t),
 	 0644, NULL, &proc_doulongvec_minmax},
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
diff -uNr 2.3.25/mm/swap_state.c 2.3.25-shm3/mm/swap_state.c
--- 2.3.25/mm/swap_state.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/mm/swap_state.c	Wed Nov  3 11:28:16 1999
@@ -68,8 +68,6 @@
 	if (!entry.val)
 		goto out;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_file;
 	p = type + swap_info;
@@ -115,8 +113,6 @@
 	if (!entry.val)
 		goto bad_entry;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_file;
 	p = type + swap_info;
diff -uNr 2.3.25/mm/swapfile.c 2.3.25-shm3/mm/swapfile.c
--- 2.3.25/mm/swapfile.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/mm/swapfile.c	Wed Nov  3 11:28:16 1999
@@ -135,8 +135,6 @@
 		goto out;
 
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_nofile;
 	p = & swap_info[type];
@@ -190,8 +188,6 @@
 		goto new_swap_entry;
 	entry.val = page->index;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto new_swap_entry;
 	if (type >= nr_swapfiles)
 		goto new_swap_entry;
 	p = type + swap_info;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-05 16:21                   ` Andrea Arcangeli
@ 1999-11-05 16:28                     ` Christoph Rohland
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Rohland @ 1999-11-05 16:28 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Christoph Rohland, Ingo Molnar, Rik van Riel, MM mailing list,
	woodman, Linus Torvalds

Andrea Arcangeli <andrea@suse.de> writes:

> On 5 Nov 1999, Christoph Rohland wrote:
> 
> >Yes I know it is questionable, but if prepare_highmem_swapout fails we
> >are in the highmem area and probably most of the rest of shm is also
> >there. So we only consume a lot of CPU if going on and calling
> 
> If prepare_highmem_swapout fails maybe all the regular pages are allocated
> in the rest of the shm segment and to free them and do progresses you
> should continue to properly shrink the VM.
> 
> >prepare_highmem_swapout again and again..
> 
> That will happen in the patological unlikely to happen case so the
> performance of such path is not an issue.

So we only need the first part of the patch.

        Christoph

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-05 16:16                 ` Christoph Rohland
@ 1999-11-05 16:21                   ` Andrea Arcangeli
  1999-11-05 16:28                     ` Christoph Rohland
  0 siblings, 1 reply; 14+ messages in thread
From: Andrea Arcangeli @ 1999-11-05 16:21 UTC (permalink / raw)
  To: Christoph Rohland
  Cc: Ingo Molnar, Rik van Riel, MM mailing list, woodman, Linus Torvalds

On 5 Nov 1999, Christoph Rohland wrote:

>Yes I know it is questionable, but if prepare_highmem_swapout fails we
>are in the highmem area and probably most of the rest of shm is also
>there. So we only consume a lot of CPU if going on and calling

If prepare_highmem_swapout fails maybe all the regular pages are allocated
in the rest of the shm segment and to free them and do progresses you
should continue to properly shrink the VM.

>prepare_highmem_swapout again and again..

That will happen in the patological unlikely to happen case so the
performance of such path is not an issue.

Andrea

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-05 13:18               ` Andrea Arcangeli
@ 1999-11-05 16:16                 ` Christoph Rohland
  1999-11-05 16:21                   ` Andrea Arcangeli
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Rohland @ 1999-11-05 16:16 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Ingo Molnar, Rik van Riel, MM mailing list, woodman, Linus Torvalds

Andrea Arcangeli <andrea@suse.de> writes:

> Christoph Rohland <hans-christoph.rohland@sap.com> writes:
> 
> >         if (!(page_map = prepare_highmem_swapout(page_map)))
> > -               goto check_table;
> > +               goto failed;
> 
> This fragment isn't correct. You may fail too early and so you may get
> a task killed due OOM even if you still have lots of regular pages
> queued in a shm segment.

Yes I know it is questionable, but if prepare_highmem_swapout fails we
are in the highmem area and probably most of the rest of shm is also
there. So we only consume a lot of CPU if going on and calling
prepare_highmem_swapout again and again..

prepare_highmem_swapout should not fail (and does not fail very often
in 2.3.26-pre2) In 2.3.25 the machine effectivly locked up when it
failed (and it failed very often).

On 2.3.26-pre2 I do not see a difference between the two versions. So
I do not know which way will be better.

        Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-05 12:35             ` Christoph Rohland
@ 1999-11-05 13:18               ` Andrea Arcangeli
  1999-11-05 16:16                 ` Christoph Rohland
  0 siblings, 1 reply; 14+ messages in thread
From: Andrea Arcangeli @ 1999-11-05 13:18 UTC (permalink / raw)
  To: Christoph Rohland
  Cc: Ingo Molnar, Rik van Riel, MM mailing list, woodman, Linus Torvalds

Christoph Rohland <hans-christoph.rohland@sap.com> writes:

>         if (!(page_map = prepare_highmem_swapout(page_map)))
> -               goto check_table;
> +               goto failed;

This fragment isn't correct. You may fail too early and so you may get
a task killed due OOM even if you still have lots of regular pages
queued in a shm segment.

-- 
Andrea
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-05  0:14           ` Andrea Arcangeli
@ 1999-11-05 12:35             ` Christoph Rohland
  1999-11-05 13:18               ` Andrea Arcangeli
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Rohland @ 1999-11-05 12:35 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Ingo Molnar, Rik van Riel, Christoph Rohland, MM mailing list,
	woodman, Linus Torvalds

Andrea Arcangeli <andrea@suse.de> writes:

> Ingo Molnar <mingo@chiara.csoma.elte.hu> writes:
> 
> > [Christoph, are you still seeing the same kind of bad swapping behavior
> > with pre1-2.3.26?]
> 
> If you still get process killed during heavy swapout (cause OOM of
> an ATOMIC allocation) please try to increase the ATOMIC pool before
> designing a separate pool. We just have a pool for atomic allocation
> it may not be large enough for the increased pressure on the regular pages.
> 
>         echo 1000 2000 4000 >/proc/sys/vm/freepages
> 
> This way you'll basically waste 16mbyte of ram.  It's just to check if
> the ATOMIC allocation shortage is the source of the segfault or not.

O.k. with and without these settings 2.3.26-pre2 looks pretty good
about allocating bounce buffers and swapping highmem pages.

To swap shm the first part of the following patch is definitely
needed. The second part makes the machine probably much more useable
if we have problems allocating bounce buffers. Which now hardly ever
happens when stresstesting my 8GB machine.

Linus, could you please apply this?

        Christoph

--- 2.3.25/ipc/shm.c    Tue Nov  2 12:46:29 1999
+++ make25/ipc/shm.c    Thu Nov  4 12:47:14 1999
@@ -788,10 +789,10 @@
                unlock_kernel();
                return 0;
        }
-       if (page_count(page_map))
+       if (page_count(page_map) != 1)
                goto check_table;
        if (!(page_map = prepare_highmem_swapout(page_map)))
-               goto check_table;
+               goto failed;
        shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
        swap_successes++;
        shm_swp++;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04 22:30         ` Ingo Molnar
  1999-11-05  0:14           ` Andrea Arcangeli
@ 1999-11-05 10:36           ` Christoph Rohland
  1 sibling, 0 replies; 14+ messages in thread
From: Christoph Rohland @ 1999-11-05 10:36 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: MM mailing list, woodman, Linus Torvalds

Ingo Molnar <mingo@chiara.csoma.elte.hu> writes:

> On Thu, 4 Nov 1999, Rik van Riel wrote:
> 
> > I think I see what is going on here. Kswapd sees that memory is
> > low an "frees" a bunch of high memory pages, causing those pages
> > to be shifted to low memory so the total number of free pages
> > stays just as low as when kswapd started.
> 
> hm, kswapd should really be immune against this.
> 
> > This can result in in-memory swap storms, we should probably
> > limit the number of in-transit async himem pages to 256 or some
> > other even smaller number.
> 
> i introduced some stupid balancing bugs, and i wrongly thought that the
> fixes are already in 2.3.25, but no, it's the pre1-2.3.26 kernel that is
> supposed to have balancing right. basically the fix is to restore the
> original behavior of not counting high memory in memory pressure. This
> might sound an unfair policy, but the real critical resource is low
> memory. If this ever proves to be a problematic approach then we still can
> make it more sophisticated.
> 
> [Christoph, are you still seeing the same kind of bad swapping behavior
> with pre1-2.3.26?]

No, after applying the following patch, it is much better now:

--- 2.3.26-pre/ipc/shm.c        Fri Nov  5 10:25:40 1999
+++ make26/ipc/shm.c    Fri Nov  5 10:54:09 1999
@@ -897,10 +897,10 @@
                unlock_kernel();
                return 0;
        }
-       if (page_count(page_map))
+       if (page_count(page_map) != 1)
                goto check_table;
        if (!(page_map = prepare_highmem_swapout(page_map)))
-               goto check_table;
+               goto failed;
        SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
        swap_successes++;
        shm_swp++;


[root@ls3016 src]# ipcs -um

------ Shared Memory Status --------
segments allocated 274
pages allocated 2244608
pages resident  2043405
pages swapped   177175
Swap performance: 251917 attempts        241439 successes

[root@ls3016 src]# cat /proc/meminfo 
        total:    used:    free:  shared: buffers:  cached:
Mem:  4152516608 4133502976 19013632        0   512000 21954560
Swap: 4133269504 725725184 3407544320
MemTotal:   8249496 kB
MemFree:      18568 kB
MemShared:        0 kB
Buffers:        500 kB
Cached:       21440 kB
HighTotal:  7471104 kB
HighFree:         0 kB
SwapTotal:  4036396 kB
SwapFree:   3327680 kB

and output of vmstat 5:

   procs                      memory    swap          io     system         cpu
 r  b  w   swpd   free   buff  cache  si  so    bi    bo   in    cs  us  sy  id
17  3  1 424020   2688    244  25932 407 10280   104  2570  970  1490   0  92   7
20  0  1 461380   1688    240  24464 467 8048   120  2012  758  1188   0  87  13
18  1  1 484676  27728    240  20692 994 5644   250  1411  549   910   0  90  10
19  0  1 497068   2732    260  25700 2690 5055   675  1264  532  1038   0  81  18
11  8  1 529820   4692    272  25192 4232 10643  1063  2661 1191  2126   0  85  15
17  2  1 559572   1472    264  19860 2538 8473   641  2118  919  1653   0  81  19
12  7  1 609780   1944    268  24280 3620 13611   912  3404 1485  2799   0  79  21
15  3  1 648148   1836    272  16648 8061 15666  2025  3916 2227  3731   0  78  22
12  6  1 692208   3044    280  23192 5394 14147  1359  3538 1768  3163   0  75  25
12  5  0 742160   2256    276  38144 6208 16190  1559  4047 1936  3419   0  78  22

So we have very few failes. Probably we never get caught by the second
part of the above patch any more.

> -- mingo
> 
> ps. some people might ask why we want to swap on an 8GB box, but i think
> it's really an issue in production systems to provide some kind of 'rubber
> wall' instead of 'hard concrete' if the system is reaching its limits.
> adding (99% unused) swap space does exactly this.

Yes we need it for ERP applications. You do not beleive how many data
is processed sometimes in business applications.

And to have a hard limit for production servers is always a reason to
use something else.

        Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04 22:30         ` Ingo Molnar
@ 1999-11-05  0:14           ` Andrea Arcangeli
  1999-11-05 12:35             ` Christoph Rohland
  1999-11-05 10:36           ` Christoph Rohland
  1 sibling, 1 reply; 14+ messages in thread
From: Andrea Arcangeli @ 1999-11-05  0:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rik van Riel, Christoph Rohland, MM mailing list, woodman,
	Linus Torvalds

Ingo Molnar <mingo@chiara.csoma.elte.hu> writes:

> [Christoph, are you still seeing the same kind of bad swapping behavior
> with pre1-2.3.26?]

If you still get process killed during heavy swapout (cause OOM of
an ATOMIC allocation) please try to increase the ATOMIC pool before
designing a separate pool. We just have a pool for atomic allocation
it may not be large enough for the increased pressure on the regular pages.

        echo 1000 2000 4000 >/proc/sys/vm/freepages

This way you'll basically waste 16mbyte of ram.  It's just to check if
the ATOMIC allocation shortage is the source of the segfault or not.

-- 
Andrea
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04 19:02       ` Rik van Riel
@ 1999-11-04 22:30         ` Ingo Molnar
  1999-11-05  0:14           ` Andrea Arcangeli
  1999-11-05 10:36           ` Christoph Rohland
  0 siblings, 2 replies; 14+ messages in thread
From: Ingo Molnar @ 1999-11-04 22:30 UTC (permalink / raw)
  To: Rik van Riel; +Cc: Christoph Rohland, MM mailing list, woodman, Linus Torvalds

On Thu, 4 Nov 1999, Rik van Riel wrote:

> I think I see what is going on here. Kswapd sees that memory is
> low an "frees" a bunch of high memory pages, causing those pages
> to be shifted to low memory so the total number of free pages
> stays just as low as when kswapd started.

hm, kswapd should really be immune against this.

> This can result in in-memory swap storms, we should probably
> limit the number of in-transit async himem pages to 256 or some
> other even smaller number.

i introduced some stupid balancing bugs, and i wrongly thought that the
fixes are already in 2.3.25, but no, it's the pre1-2.3.26 kernel that is
supposed to have balancing right. basically the fix is to restore the
original behavior of not counting high memory in memory pressure. This
might sound an unfair policy, but the real critical resource is low
memory. If this ever proves to be a problematic approach then we still can
make it more sophisticated.

[Christoph, are you still seeing the same kind of bad swapping behavior
with pre1-2.3.26?]

-- mingo

ps. some people might ask why we want to swap on an 8GB box, but i think
it's really an issue in production systems to provide some kind of 'rubber
wall' instead of 'hard concrete' if the system is reaching its limits.
adding (99% unused) swap space does exactly this.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04 17:58     ` Ingo Molnar
@ 1999-11-04 19:02       ` Rik van Riel
  1999-11-04 22:30         ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Rik van Riel @ 1999-11-04 19:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Christoph Rohland, MM mailing list, woodman, Linus Torvalds

On Thu, 4 Nov 1999, Ingo Molnar wrote:
> On 4 Nov 1999, Christoph Rohland wrote:
> 
> > I do get swapping also with 8GB of RAM, but it runs out of memory
> > before running out of swap space since prepare_highmem_swapout is
> > failing way to often.
> 
> ho humm. I think prepare_highmem_swapout() has a design bug. It's way too
> naive in low memory situations, it should keep a short list of pages for
> emergency swapout. It's the GFP_ATOMIC that is failing too often, right?
> 
> i believe we should have some explicit mechanizm that tells vmscan that
> there is 'IO in progress which will result in more memory', to distinct
> between true out-of-memory and 'wait a little bit to get more RAM' cases?

I think I see what is going on here. Kswapd sees that memory is
low an "frees" a bunch of high memory pages, causing those pages
to be shifted to low memory so the total number of free pages
stays just as low as when kswapd started.

This can result in in-memory swap storms, we should probably
limit the number of in-transit async himem pages to 256 or some
other even smaller number.

regards,

Rik
--
The Internet is not a network of computers. It is a network
of people. That is its real strength.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04 12:40   ` Christoph Rohland
@ 1999-11-04 17:58     ` Ingo Molnar
  1999-11-04 19:02       ` Rik van Riel
  0 siblings, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 1999-11-04 17:58 UTC (permalink / raw)
  To: Christoph Rohland; +Cc: MM mailing list, woodman, Linus Torvalds

On 4 Nov 1999, Christoph Rohland wrote:

> I do get swapping also with 8GB of RAM, but it runs out of memory
> before running out of swap space since prepare_highmem_swapout is
> failing way to often.
> 
> (It then locks up since it cannot free the shm segments and so is
> unable to free the memory. This should be perhaps addressed later in
> the oom handler. It cannot handle the case where nearly all memory is
> allocted in shm segments)

ho humm. I think prepare_highmem_swapout() has a design bug. It's way too
naive in low memory situations, it should keep a short list of pages for
emergency swapout. It's the GFP_ATOMIC that is failing too often, right?

i believe we should have some explicit mechanizm that tells vmscan that
there is 'IO in progress which will result in more memory', to distinct
between true out-of-memory and 'wait a little bit to get more RAM' cases?
I think we'd have a lot less to worry about and there would be a much
clearer distinction between true out-of-mem and 'just cannot allocate it
right now but help is on the way' cases.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-04  8:10 ` Ingo Molnar
@ 1999-11-04 12:40   ` Christoph Rohland
  1999-11-04 17:58     ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Rohland @ 1999-11-04 12:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: MM mailing list, woodman

Hi Ingo,

Ingo Molnar <mingo@chiara.csoma.elte.hu> writes:

> On 3 Nov 1999, Christoph Rohland wrote:
> 
> > I did test it a lot on SMP/HIGHMEM. Since 2.3.25 with and without this
> > breaks on swapping shm and other high memory load conditions I could
> > not verify everything. But I would like to see this in the mainstream
> > kernel. I will then proceed debugging the swapping issues.
> 
> (i can see the problems too, but i've got no explanation either, working
> on it as well.)
> 
> -- mingo

After applying the following patch against ipc/shm.c (thanks to Larry
Woodman <woodman@missioncriticallinux.com> for the first one):

--- 2.3.25/ipc/shm.c    Tue Nov  2 12:46:29 1999
+++ make25/ipc/shm.c    Thu Nov  4 12:47:14 1999
@@ -788,10 +789,10 @@
                unlock_kernel();
                return 0;
        }
-       if (page_count(page_map))
+       if (page_count(page_map) != 1)
                goto check_table;
        if (!(page_map = prepare_highmem_swapout(page_map)))
-               goto check_table;
+               goto failed;
        shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
        swap_successes++;
        shm_swp++;
791c791
< 	if (page_count(page_map))
---
> 	if (page_count(page_map) != 1)

I get the same behaviour for shm swapping as on 2.3.22 as long as I
only use 1G memory (HIGHMEM setting is always 64GB).

I do get swapping also with 8GB of RAM, but it runs out of memory
before running out of swap space since prepare_highmem_swapout is
failing way to often.

(It then locks up since it cannot free the shm segments and so is
unable to free the memory. This should be perhaps addressed later in
the oom handler. It cannot handle the case where nearly all memory is
allocted in shm segments)

Greetings
          Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Patch] shm cleanups
  1999-11-03 21:30 Christoph Rohland
@ 1999-11-04  8:10 ` Ingo Molnar
  1999-11-04 12:40   ` Christoph Rohland
  0 siblings, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 1999-11-04  8:10 UTC (permalink / raw)
  To: Christoph Rohland; +Cc: Linus Torvalds, MM mailing list, Kanoj Sarcar

On 3 Nov 1999, Christoph Rohland wrote:

> I did test it a lot on SMP/HIGHMEM. Since 2.3.25 with and without this
> breaks on swapping shm and other high memory load conditions I could
> not verify everything. But I would like to see this in the mainstream
> kernel. I will then proceed debugging the swapping issues.

(i can see the problems too, but i've got no explanation either, working
on it as well.)

-- mingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Patch] shm cleanups
@ 1999-11-03 21:30 Christoph Rohland
  1999-11-04  8:10 ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Rohland @ 1999-11-03 21:30 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: MM mailing list, Kanoj Sarcar, Ingo Molnar

The following message was crippled somewhere on the way to linux-mm so
I resend it:
 
Hi Linus,

Here is a patch against 2.3.25 which

1) avoids vmalloc in the shm coding.
2) cleans a lot of cruft out of the shm headers
3) makes shm sysctl'able

I did test it a lot on SMP/HIGHMEM. Since 2.3.25 with and without this
breaks on swapping shm and other high memory load conditions I could
not verify everything. But I would like to see this in the mainstream
kernel. I will then proceed debugging the swapping issues.

Greetings
          Christoph

diff -uNr 2.3.25/arch/i386/kernel/sys_i386.c 2.3.25-shm3/arch/i386/kernel/sys_i386.c
--- 2.3.25/arch/i386/kernel/sys_i386.c	Fri Dec 18 01:27:35 1998
+++ 2.3.25-shm3/arch/i386/kernel/sys_i386.c	Wed Nov  3 11:28:11 1999
@@ -116,86 +116,71 @@
 	version = call >> 16; /* hack for backward compatibility */
 	call &= 0xffff;

-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
+        switch (call) {
+        case SEMOP:
+                return sys_semop (first, (struct sembuf *)ptr, second);
+        case SEMGET:
+                return sys_semget (first, second, third);
+        case SEMCTL: {
+                union semun fourth;
+                if (!ptr)
+                        return -EINVAL;
+                if (get_user(fourth.__pad, (void **) ptr))
+                        return -EFAULT;
+                return sys_semctl (first, second, third, fourth);
+        }

-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				
-				if (copy_from_user(&tmp,
-						   (struct ipc_kludge *) ptr, 
-						   sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = sys_shmat (first, (char *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong *) third);
-			}
-			case 1:	/* iBCS2 emulator entry point */
-				if (!segment_eq(get_fs(), get_ds()))
-					return -EINVAL;
-				return sys_shmat (first, (char *) ptr,
-						  second, (ulong *) third);
-			}
-		case SHMDT: 
-			return sys_shmdt ((char *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	
-	return -EINVAL;
+        case MSGSND:
+                return sys_msgsnd (first, (struct msgbuf *) ptr, 
+                                   second, third);
+        case MSGRCV:
+                switch (version) {
+                case 0: {
+                        struct ipc_kludge tmp;
+                        if (!ptr)
+                                return -EINVAL;
+                        
+                        if (copy_from_user(&tmp,
+                                           (struct ipc_kludge *) ptr, 
+                                           sizeof (tmp)))
+                                return -EFAULT;
+                        return sys_msgrcv (first, tmp.msgp, second,
+                                           tmp.msgtyp, third);
+                }
+                default:
+                        return sys_msgrcv (first,
+                                           (struct msgbuf *) ptr,
+                                           second, fifth, third);
+                }
+        case MSGGET:
+                return sys_msgget ((key_t) first, second);
+        case MSGCTL:
+                return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+
+        case SHMAT:
+                switch (version) {
+                default: {
+                        ulong raddr;
+                        ret = sys_shmat (first, (char *) ptr, second, &raddr);
+                        if (ret)
+                                return ret;
+                        return put_user (raddr, (ulong *) third);
+                }
+                case 1:	/* iBCS2 emulator entry point */
+                        if (!segment_eq(get_fs(), get_ds()))
+                                return -EINVAL;
+                        return sys_shmat (first, (char *) ptr, second, (ulong *) third);
+                }
+        case SHMDT: 
+                return sys_shmdt ((char *)ptr);
+        case SHMGET:
+                return sys_shmget (first, second, third);
+        case SHMCTL:
+                return sys_shmctl (first, second,
+                                   (struct shmid_ds *) ptr);
+        default:
+                return -EINVAL;
+        }
 }

 /*
diff -uNr 2.3.25/include/asm-alpha/shmparam.h 2.3.25-shm3/include/asm-alpha/shmparam.h
--- 2.3.25/include/asm-alpha/shmparam.h	Mon Oct  7 14:12:29 1996
+++ 2.3.25-shm3/include/asm-alpha/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -1,47 +1,6 @@
 #ifndef _ASMAXP_SHMPARAM_H
 #define _ASMAXP_SHMPARAM_H

-/*
- * Address range for shared memory attaches if no address passed to shmat().
- */
-#define SHM_RANGE_START	0x14000000000
-#define SHM_RANGE_END	0x15000000000
-
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the Alpha and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x3fa000			/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _ASMAXP_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-arm/proc-armo/shmparam.h 2.3.25-shm3/include/asm-arm/proc-armo/shmparam.h
--- 2.3.25/include/asm-arm/proc-armo/shmparam.h	Wed Jan 21 01:39:42 1998
+++ 2.3.25-shm3/include/asm-arm/proc-armo/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -9,9 +9,7 @@
 #ifndef __ASM_PROC_SHMPARAM_H
 #define __ASM_PROC_SHMPARAM_H

-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START	0x00a00000
-#define SHM_RANGE_END	0x00c00000
+#ifndef SHMMAX
 #define SHMMAX		0x003fa000
 #endif

diff -uNr 2.3.25/include/asm-arm/proc-armv/shmparam.h 2.3.25-shm3/include/asm-arm/proc-armv/shmparam.h
--- 2.3.25/include/asm-arm/proc-armv/shmparam.h	Wed Jan 21 01:39:43 1998
+++ 2.3.25-shm3/include/asm-arm/proc-armv/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -10,9 +10,7 @@
 #ifndef __ASM_PROC_SHMPARAM_H
 #define __ASM_PROC_SHMPARAM_H

-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
+#ifndef SHMMAX
 #define SHMMAX		0x01000000
 #endif

diff -uNr 2.3.25/include/asm-arm/shmparam.h 2.3.25-shm3/include/asm-arm/shmparam.h
--- 2.3.25/include/asm-arm/shmparam.h	Mon Oct 25 09:25:29 1999
+++ 2.3.25-shm3/include/asm-arm/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -3,39 +3,6 @@

 #include <asm/proc/shmparam.h>

-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _ASMARM_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-i386/shmparam.h 2.3.25-shm3/include/asm-i386/shmparam.h
--- 2.3.25/include/asm-i386/shmparam.h	Sun Nov  8 23:06:18 1998
+++ 2.3.25-shm3/include/asm-i386/shmparam.h	Wed Nov  3 11:28:11 1999
@@ -1,46 +1,6 @@
 #ifndef _ASMI386_SHMPARAM_H
 #define _ASMI386_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _ASMI386_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-m68k/shmparam.h 2.3.25-shm3/include/asm-m68k/shmparam.h
--- 2.3.25/include/asm-m68k/shmparam.h	Mon Oct 18 14:35:02 1999
+++ 2.3.25-shm3/include/asm-m68k/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,49 +1,6 @@
 #ifndef _M68K_SHMPARAM_H
 #define _M68K_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#ifndef CONFIG_SUN3
-#define SHM_RANGE_START	0xC0000000
-#define SHM_RANGE_END	0xD0000000
-#else
-#define SHM_RANGE_START	0x0C000000
-#define SHM_RANGE_END	0x0D000000
-#endif
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _M68K_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-mips/shmparam.h 2.3.25-shm3/include/asm-mips/shmparam.h
--- 2.3.25/include/asm-mips/shmparam.h	Sat Jun 26 02:37:53 1999
+++ 2.3.25-shm3/include/asm-mips/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,49 +1,9 @@
 #ifndef __ASM_MIPS_SHMPARAM_H
 #define __ASM_MIPS_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
 /*
  * This constant is very large but the ABI in it's wisdom says ...
  */
 #define	SHMLBA 0x40000			/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */

 #endif /* __ASM_MIPS_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-ppc/shmparam.h 2.3.25-shm3/include/asm-ppc/shmparam.h
--- 2.3.25/include/asm-ppc/shmparam.h	Mon Oct 18 14:35:24 1999
+++ 2.3.25-shm3/include/asm-ppc/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,44 +1,6 @@
 #ifndef _PPC_SHMPARAM_H
 #define _PPC_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _PPC_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sh/shmparam.h 2.3.25-shm3/include/asm-sh/shmparam.h
--- 2.3.25/include/asm-sh/shmparam.h	Mon Oct 18 14:34:54 1999
+++ 2.3.25-shm3/include/asm-sh/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -1,46 +1,6 @@
 #ifndef __ASM_SH_SHMPARAM_H
 #define __ASM_SH_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x50000000
-#define SHM_RANGE_END	0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000		/* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* __ASM_SH_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sparc/shmparam.h 2.3.25-shm3/include/asm-sparc/shmparam.h
--- 2.3.25/include/asm-sparc/shmparam.h	Sun Oct  4 19:22:44 1998
+++ 2.3.25-shm3/include/asm-sparc/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -2,44 +2,6 @@
 #ifndef _ASMSPARC_SHMPARAM_H
 #define _ASMSPARC_SHMPARAM_H

-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x10000000
-#define SHM_RANGE_END	0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define	SHMLBA PAGE_SIZE		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */

 #endif /* _ASMSPARC_SHMPARAM_H */
diff -uNr 2.3.25/include/asm-sparc64/shmparam.h 2.3.25-shm3/include/asm-sparc64/shmparam.h
--- 2.3.25/include/asm-sparc64/shmparam.h	Sun Oct  4 19:22:44 1998
+++ 2.3.25-shm3/include/asm-sparc64/shmparam.h	Wed Nov  3 11:28:16 1999
@@ -2,46 +2,6 @@
 #ifndef _ASMSPARC64_SHMPARAM_H
 #define _ASMSPARC64_SHMPARAM_H

-/* XXX Redo most of this... */
-
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START	0x10000000
-#define SHM_RANGE_END	0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- *  bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- *  bits 7..21: index of page within shared memory segment (SHM_IDX)
- *		(actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS	7
-#define SHM_ID_MASK	((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT	(_SHM_ID_BITS)
-#define _SHM_IDX_BITS	15
-#define SHM_IDX_MASK	((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000		/* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */	/* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS)	/* max num of segs system wide */
-#define SHMALL				/* max shm system wide (pages) */ \
-	(1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
 #define	SHMLBA (PAGE_SIZE<<1)		/* attach addr a multiple of this */
-#define SHMSEG SHMMNI			/* max shared segs per process */

 #endif /* _ASMSPARC64_SHMPARAM_H */
diff -uNr 2.3.25/include/linux/shm.h 2.3.25-shm3/include/linux/shm.h
--- 2.3.25/include/linux/shm.h	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/include/linux/shm.h	Wed Nov  3 11:28:16 1999
@@ -3,6 +3,17 @@

 #include <linux/ipc.h>

+/*
+ * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
+ * be increased by sysctl
+ */
+
+#define SHMMAX 0x2000000		 /* max shared seg size (bytes) */
+#define SHMMIN 1 /* really PAGE_SIZE */	 /* min shared seg size (bytes) */
+#define SHMMNI 128			 /* max num of segs system wide */
+#define SHMALL (SHMMAX/PAGE_SIZE*SHMMNI) /* max shm system wide (pages) */
+#define SHMSEG SHMMNI			 /* max shared segs per process */
+
 #include <asm/shmparam.h>

 struct shmid_ds {
@@ -17,15 +28,6 @@
 	unsigned short 		shm_unused;	/* compatibility */
 	void 			*shm_unused2;	/* ditto - used by DIPC */
 	void			*shm_unused3;	/* unused */
-};
-
-struct shmid_kernel
-{	
-	struct shmid_ds		u;
-	/* the following are private */
-	unsigned long		shm_npages;	/* size of segment (pages) */
-	pte_t			*shm_pages;	/* array of ptrs to frames -> SHMMAX */ 
-	struct vm_area_struct	*attaches;	/* descriptors for attaches */
 };

 /* permission flag for shmget */
diff -uNr 2.3.25/include/linux/swap.h 2.3.25-shm3/include/linux/swap.h
--- 2.3.25/include/linux/swap.h	Wed Nov  3 18:00:50 1999
+++ 2.3.25-shm3/include/linux/swap.h	Wed Nov  3 11:55:52 1999
@@ -122,17 +122,6 @@
 asmlinkage long sys_swapoff(const char *);
 asmlinkage long sys_swapon(const char *, int);

-/*
- * vm_ops not present page codes for shared memory.
- *
- * Will go away eventually..
- */
-#define SHM_SWP_TYPE 0x20
-
-/*
- * swap cache stuff (in linux/mm/swap_state.c)
- */
-
 #define SWAP_CACHE_INFO

 #ifdef SWAP_CACHE_INFO
diff -uNr 2.3.25/include/linux/sysctl.h 2.3.25-shm3/include/linux/sysctl.h
--- 2.3.25/include/linux/sysctl.h	Thu Oct 28 09:26:49 1999
+++ 2.3.25-shm3/include/linux/sysctl.h	Wed Nov  3 19:58:43 1999
@@ -103,8 +103,7 @@
 	KERN_MSGPOOL=37,        /* int: Maximum system message pool size */
 	KERN_SYSRQ=38,		/* int: Sysreq enable */
 	KERN_MAX_THREADS=39,    /* int: Maximum nr of threads in the system */
- 	KERN_RANDOM=40,		/* Random driver */
- 	KERN_SHMALL=41		/* int: Maximum size of shared memory */
+ 	KERN_RANDOM=40		/* Random driver */
 };

diff -uNr 2.3.25/ipc/shm.c 2.3.25-shm3/ipc/shm.c
--- 2.3.25/ipc/shm.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/ipc/shm.c	Wed Nov  3 11:55:39 1999
@@ -9,6 +9,8 @@
  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
+ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
+ *                         Christoph Rohland <hans-christoph.rohland@sap.com>
  */

 #include <linux/config.h>
@@ -25,7 +27,17 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>

-extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
+#include "util.h"
+
+struct shmid_kernel /* extend struct shmis_ds with private fields */
+{	
+	struct shmid_ds		u;
+	unsigned long		shm_npages; /* size of segment (pages) */
+	pte_t			**shm_dir;  /* ptr to array of ptrs to frames -> SHMMAX */ 
+	struct vm_area_struct	*attaches;  /* descriptors for attaches */
+        int                     id; /* backreference to id for shm_close */
+};
+
 static int findkey (key_t key);
 static int newseg (key_t key, int shmflg, size_t size);
 static int shm_map (struct vm_area_struct *shmd);
@@ -38,13 +50,15 @@
 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #endif

+unsigned int shm_prm[3] = {SHMMAX, SHMALL, SHMMNI};
+
 static int shm_tot = 0; /* total number of shared memory pages */
 static int shm_rss = 0; /* number of shared memory pages that are in memory */
 static int shm_swp = 0; /* number of shared memory pages that are in swap */
-static int max_shmid = 0; /* every used id is <= max_shmid */
+static int max_shmid = -1; /* every used id is <= max_shmid */
 static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
-static struct shmid_kernel *shm_segs[SHMMNI];
-
+static struct shmid_kernel **shm_segs = NULL;
+static unsigned int num_segs = 0;
 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */

 spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
@@ -56,22 +70,109 @@

 void __init shm_init (void)
 {
-	int id;
-
-	for (id = 0; id < SHMMNI; id++)
-		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
-	shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
-	init_waitqueue_head(&shm_wait);
 #ifdef CONFIG_PROC_FS
 	create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
 #endif
 	return;
 }

+#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+
+static pte_t **shm_alloc(unsigned long pages)
+{
+        unsigned short dir  = pages / PTRS_PER_PTE;
+        unsigned short last = pages % PTRS_PER_PTE;
+        pte_t **ret, **ptr;
+
+        ret = kmalloc ((dir+1) * sizeof(unsigned long), GFP_KERNEL);
+        if (ret == NULL)
+                return NULL;
+
+        for (ptr = ret; ptr < ret+dir ; ptr++)
+        {
+                *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
+                if (*ptr == NULL)
+                        goto free;
+		memset (*ptr, 0, PAGE_SIZE); 
+        }
+
+        /* The last one is probably not of PAGE_SIZE: we use kmalloc */
+        if (last) {
+                *ptr = kmalloc (last*sizeof(pte_t *), GFP_KERNEL);
+                if (*ptr == NULL)
+                        goto free;
+		memset (*ptr, 0, last*sizeof(pte_t *));
+        }
+        
+        return ret;
+
+free:
+        /* The last failed: we decrement first */
+        while (--ptr >= ret)
+                free_page ((unsigned long)*ptr);
+
+        kfree (ret);
+        return NULL;
+}
+
+
+static void shm_free(pte_t** dir, unsigned long pages)
+{
+        pte_t **ptr = dir+pages/PTRS_PER_PTE;
+
+        /* first the last page */
+        if (pages%PTRS_PER_PTE)
+                kfree (*ptr);
+        /* now the whole pages */
+        while (--ptr >= dir)
+                free_page ((unsigned long)*ptr);
+
+        /* Now the indirect block */
+        kfree (dir);
+}
+
+static int shm_expand (unsigned int size)
+{
+        int id;
+        struct shmid_kernel ** new_array;
+
+        spin_unlock(&shm_lock);
+	new_array = kmalloc (size * sizeof(struct shmid_kernel *), GFP_KERNEL);
+        spin_lock(&shm_lock);
+
+	if (!new_array)
+                return -ENOMEM;
+
+        if (size <= num_segs){ /* We check this after kmalloc so
+                                   nobody changes num_segs afterwards */
+                /*
+                 * We never shrink the segment. If we shrink we have to
+                 * check for stale handles in newseg
+                 */
+                kfree (new_array);
+                return 0;
+        }
+
+        if (num_segs) {
+                memcpy (new_array, shm_segs,
+                        size*sizeof(struct shmid_kernel *));
+                kfree (shm_segs);
+        }
+        for (id = num_segs; id < size; id++) 
+		new_array[id] = (void *) IPC_UNUSED;
+
+        shm_segs = new_array;
+        num_segs = size;
+        return 0;
+}
+
 static int findkey (key_t key)
 {
 	int id;
 	struct shmid_kernel *shp;
+        
+        if (!num_segs)
+                return -1;

 	for (id = 0; id <= max_shmid; id++) {
 		if ((shp = shm_segs[id]) == IPC_NOID) {
@@ -89,9 +190,8 @@
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&shm_wait, &wait);
 		}
-		if (shp == IPC_UNUSED)
-			continue;
-		if (key == shp->u.shm_perm.key)
+		if (shp != IPC_UNUSED &&
+                    key == shp->u.shm_perm.key)
 			return id;
 	}
 	return -1;
@@ -99,18 +199,33 @@

 /*
  * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
+ * This has to be called with the shm_lock held
  */
 static int newseg (key_t key, int shmflg, size_t size)
 {
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
-	int id;
+	int id, err;
+        unsigned int shmall, shmmni;

+        lock_kernel();
+        shmall = shm_prm[1];
+        shmmni = shm_prm[2];
+        if (shmmni > IPCMNI) {
+                printk ("shmmni reset to max of %u\n", IPCMNI);
+                shmmni = shm_prm[2] = IPCMNI;
+        }
+        unlock_kernel();
+
+        if (shmmni < used_segs)
+                return -ENOSPC;
+        if ((err = shm_expand (shmmni)))
+                return err;
 	if (size < SHMMIN)
 		return -EINVAL;
-	if (shm_tot + numpages >= SHMALL)
+	if (shm_tot + numpages >= shmall)
 		return -ENOSPC;
-	for (id = 0; id < SHMMNI; id++)
+	for (id = 0; id < num_segs; id++)
 		if (shm_segs[id] == IPC_UNUSED) {
 			shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
 			goto found;
@@ -126,10 +241,8 @@
 		wake_up (&shm_wait);
 		return -ENOMEM;
 	}
-	lock_kernel();
-	shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
-	unlock_kernel();
-	if (!shp->shm_pages) {
+	shp->shm_dir = shm_alloc (numpages);
+	if (!shp->shm_dir) {
 		kfree(shp);
 		spin_lock(&shm_lock);
 		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
@@ -137,8 +250,6 @@
 		return -ENOMEM;
 	}

-	memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
-
 	shp->u.shm_perm.key = key;
 	shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
 	shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -150,6 +261,7 @@
 	shp->u.shm_atime = shp->u.shm_dtime = 0;
 	shp->u.shm_ctime = CURRENT_TIME;
 	shp->shm_npages = numpages;
+        shp->id = id;

 	spin_lock(&shm_lock);

@@ -161,21 +273,25 @@
 	shm_segs[id] = shp;
 	used_segs++;
 	wake_up (&shm_wait);
-	return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
+	return (unsigned int) shp->u.shm_perm.seq * IPCMNI + id;
 }

-size_t shmmax = SHMMAX;
-
 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
 {
 	struct shmid_kernel *shp;
 	int err, id = 0;
+        size_t shmmax;
+
+        lock_kernel();
+        shmmax = shm_prm[0];
+        unlock_kernel();
+
+	if (size > shmmax)
+		return -EINVAL;

 	down(&current->mm->mmap_sem);
 	spin_lock(&shm_lock);
-	if (size > shmmax) {
-		err = -EINVAL;
-	} else if (key == IPC_PRIVATE) {
+	if (key == IPC_PRIVATE) {
 		err = newseg(key, shmflg, size);
 	} else if ((id = findkey (key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
@@ -193,7 +309,7 @@
 		else if (ipcperms (&shp->u.shm_perm, shmflg))
 			err = -EACCES;
 		else
-			err = (int) shp->u.shm_perm.seq * SHMMNI + id;
+			err = (int) shp->u.shm_perm.seq * IPCMNI + id;
 	}
 	spin_unlock(&shm_lock);
 	up(&current->mm->mmap_sem);
@@ -214,18 +330,18 @@
 	if (shp == IPC_NOID || shp == IPC_UNUSED)
 		BUG();
 	shp->u.shm_perm.seq++;     /* for shmat */
-	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
+	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/IPCMNI); /* increment, but avoid overflow */
 	shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
 	used_segs--;
 	if (id == max_shmid)
-		while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
-	if (!shp->shm_pages)
-		BUG();
+		while (max_shmid-- > 0 && (shm_segs[max_shmid] == IPC_UNUSED));
+	if (!shp->shm_dir)
+                BUG();
 	spin_unlock(&shm_lock);
 	numpages = shp->shm_npages;
 	for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
 		pte_t pte;
-		pte = shp->shm_pages[i];
+		pte = SHM_ENTRY (shp,i);
 		if (pte_none(pte))
 			continue;
 		if (pte_present(pte)) {
@@ -238,9 +354,7 @@
 			swp++;
 		}
 	}
-	lock_kernel();
-	vfree(shp->shm_pages);
-	unlock_kernel();
+	shm_free (shp->shm_dir, numpages);
 	kfree(shp);
 	spin_lock(&shm_lock);
 	shm_rss -= rss;
@@ -269,19 +383,20 @@
 	case IPC_INFO:
 	{
 		struct shminfo shminfo;
+		spin_unlock(&shm_lock);
 		err = -EFAULT;
 		if (!buf)
 			goto out;
-		shminfo.shmmni = SHMMNI;
-		shminfo.shmmax = shmmax;
+                lock_kernel();
+		shminfo.shmmni = shminfo.shmseg = shm_prm[2];
+		shminfo.shmmax = shm_prm[0];
+		shminfo.shmall = shm_prm[1];
+                unlock_kernel();
 		shminfo.shmmin = SHMMIN;
-		shminfo.shmall = SHMALL;
-		shminfo.shmseg = SHMSEG;
-		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_INFO:
@@ -298,7 +413,7 @@
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_STAT:
@@ -310,7 +425,7 @@
 			goto out;
 		if (ipcperms (&shp->u.shm_perm, S_IRUGO))
 			goto out;
-		id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
+		id = (unsigned int) shp->u.shm_perm.seq * IPCMNI + shmid;
 		err = -EFAULT;
 		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shp->u, sizeof(*buf)))
@@ -320,12 +435,13 @@
 		goto out;
 	}

-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
 	err = -EINVAL;
-	if (shp == IPC_UNUSED || shp == IPC_NOID)
+	if ((id = (unsigned int) shmid % IPCMNI) > max_shmid)
+		goto out;
+	if ((shp = shm_segs[id]) == IPC_UNUSED || shp == IPC_NOID)
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;
 	ipcp = &shp->u.shm_perm;

@@ -480,7 +596,7 @@
 	if (shmid < 0)
 		goto out;

-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
+	shp = shm_segs[id = (unsigned int) shmid % IPCMNI];
 	if (shp == IPC_UNUSED || shp == IPC_NOID)
 		goto out;

@@ -523,7 +639,7 @@
 	if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;

 	spin_unlock(&shm_lock);
@@ -532,13 +648,13 @@
 	spin_lock(&shm_lock);
 	if (!shmd)
 		goto out;
-	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
+	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)) {
 		kmem_cache_free(vm_area_cachep, shmd);
 		err = -EIDRM;
 		goto out;
 	}

-	shmd->vm_private_data = shm_segs + id;
+	shmd->vm_private_data = shm_segs[id];
 	shmd->vm_start = addr;
 	shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
 	shmd->vm_mm = current->mm;
@@ -584,7 +700,7 @@
 	struct shmid_kernel *shp;

 	spin_lock(&shm_lock);
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	insert_attach(shp,shmd);  /* insert shmd into shp->attaches */
 	shp->u.shm_nattch++;
 	shp->u.shm_atime = CURRENT_TIME;
@@ -604,14 +720,12 @@

 	spin_lock(&shm_lock);
 	/* remove from the list of attaches of the shm segment */
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	remove_attach(shp,shmd);  /* remove from shp->attaches */
   	shp->u.shm_lpid = current->pid;
 	shp->u.shm_dtime = CURRENT_TIME;
-	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
-		unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
-		killseg (id);
-	}
+	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+		killseg (shp->id);
 	spin_unlock(&shm_lock);
 }

@@ -648,7 +762,7 @@
 }

 /*
- * page not present ... go through shm_pages
+ * page not present ... go through shm_dir
  */
 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
 {
@@ -657,13 +771,13 @@
 	unsigned int idx;
 	struct page * page;

-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	idx = (address - shmd->vm_start) >> PAGE_SHIFT;
 	idx += shmd->vm_pgoff;

 	spin_lock(&shm_lock);
 again:
-	pte = shp->shm_pages[idx];
+	pte = SHM_ENTRY(shp,idx);
 	if (!pte_present(pte)) {
 		if (pte_none(pte)) {
 			spin_unlock(&shm_lock);
@@ -672,7 +786,7 @@
 				goto oom;
 			clear_highpage(page);
 			spin_lock(&shm_lock);
-			if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
+			if (pte_val(pte) != pte_val(SHM_ENTRY(shp, idx)))
 				goto changed;
 		} else {
 			swp_entry_t entry = pte_to_swp_entry(pte);
@@ -694,18 +808,18 @@
 			unlock_kernel();
 			spin_lock(&shm_lock);
 			shm_swp--;
-			pte = shp->shm_pages[idx];
+			pte = SHM_ENTRY(shp, idx);
 			if (pte_present(pte))
 				goto present;
 		}
 		shm_rss++;
 		pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-		shp->shm_pages[idx] = pte;
+		SHM_ENTRY(shp, idx) = pte;
 	} else
 		--current->maj_flt;  /* was incremented in do_no_page */

 done:
-	/* pte_val(pte) == shp->shm_pages[idx] */
+	/* pte_val(pte) == SHM_ENTRY (shp, idx) */
 	get_page(pte_page(pte));
 	spin_unlock(&shm_lock);
 	current->min_flt++;
@@ -770,7 +884,7 @@
 	if (idx >= shp->shm_npages)
 		goto next_id;

-	page = shp->shm_pages[idx];
+	page = SHM_ENTRY(shp, idx);
 	if (!pte_present(page))
 		goto check_table;
 	page_map = pte_page(page);
@@ -792,7 +906,7 @@
 		goto check_table;
 	if (!(page_map = prepare_highmem_swapout(page_map)))
 		goto check_table;
-	shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
+	SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
@@ -812,12 +926,12 @@
  * Free the swap entry and set the new pte for the shm page.
  */
 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
-			swp_entry_t entry, struct page *page)
+			   swp_entry_t entry, struct page *page)
 {
 	pte_t pte;

 	pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-	shp->shm_pages[idx] = pte;
+	SHM_ENTRY(shp, idx) = pte;
 	get_page(page);
 	shm_rss++;

@@ -837,16 +951,16 @@
 	int i, n;

 	spin_lock(&shm_lock);
-	for (i = 0; i < SHMMNI; i++) {
+	for (i = 0; i <= max_shmid; i++) {
 		struct shmid_kernel *seg = shm_segs[i];
 		if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
 			continue;
 		for (n = 0; n < seg->shm_npages; n++) {
-			if (pte_none(seg->shm_pages[n]))
+			if (pte_none(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_present(seg->shm_pages[n]))
+			if (pte_present(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_to_swp_entry(seg->shm_pages[n]).val == entry.val) {
+			if (pte_to_swp_entry(SHM_ENTRY(seg,n)).val == entry.val) {
 				shm_unuse_page(seg, n, entry, page);
 				return;
 			}
@@ -865,7 +979,7 @@
     	len += sprintf(buffer, "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n");

 	spin_lock(&shm_lock);
-    	for(i = 0; i < SHMMNI; i++)
+    	for(i = 0; i <= max_shmid; i++)
 		if(shm_segs[i] != IPC_UNUSED) {
 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
@@ -877,7 +991,7 @@
 				format = BIG_STRING;
 	    		len += sprintf(buffer + len, format,
 			shm_segs[i]->u.shm_perm.key,
-			shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
+			shm_segs[i]->u.shm_perm.seq * IPCMNI + i,
 			shm_segs[i]->u.shm_perm.mode,
 			shm_segs[i]->u.shm_segsz,
 			shm_segs[i]->u.shm_cpid,
diff -uNr 2.3.25/ipc/util.c 2.3.25-shm3/ipc/util.c
--- 2.3.25/ipc/util.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/ipc/util.c	Wed Nov  3 20:04:41 1999
@@ -14,6 +14,8 @@
 #include <linux/init.h>
 #include <linux/msg.h>

+#include "util.h"
+
 #if defined(CONFIG_SYSVIPC)

 extern void sem_init (void), msg_init (void), shm_init (void);
diff -uNr 2.3.25/ipc/util.h 2.3.25-shm3/ipc/util.h
--- 2.3.25/ipc/util.h	Thu Jan  1 01:00:00 1970
+++ 2.3.25-shm3/ipc/util.h	Wed Nov  3 11:28:16 1999
@@ -0,0 +1,12 @@
+/*
+ * linux/ipc/util.h
+ * Copyright (C) 1999 Christoph Rohland
+ */
+
+/*
+ * IPCMNI is the absolute maximum for ipc identifier. This is used to
+ * detect stale identifiers
+ */
+#define IPCMNI (1<<15)          
+
+extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
diff -uNr 2.3.25/kernel/sysctl.c 2.3.25-shm3/kernel/sysctl.c
--- 2.3.25/kernel/sysctl.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/kernel/sysctl.c	Wed Nov  3 11:32:02 1999
@@ -49,7 +49,7 @@
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-extern size_t shmmax;
+extern size_t shm_prm[];
 #endif

 #ifdef __sparc__
@@ -213,7 +213,7 @@
 	{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
 	 0644, NULL, &proc_dointvec},
 #ifdef CONFIG_SYSVIPC
-	{KERN_SHMMAX, "shmmax", &shmmax, sizeof (size_t),
+	{KERN_SHMMAX, "shmmax", &shm_prm, 3*sizeof (size_t),
 	 0644, NULL, &proc_doulongvec_minmax},
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
diff -uNr 2.3.25/mm/swap_state.c 2.3.25-shm3/mm/swap_state.c
--- 2.3.25/mm/swap_state.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/mm/swap_state.c	Wed Nov  3 11:28:16 1999
@@ -68,8 +68,6 @@
 	if (!entry.val)
 		goto out;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_file;
 	p = type + swap_info;
@@ -115,8 +113,6 @@
 	if (!entry.val)
 		goto bad_entry;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_file;
 	p = type + swap_info;
diff -uNr 2.3.25/mm/swapfile.c 2.3.25-shm3/mm/swapfile.c
--- 2.3.25/mm/swapfile.c	Tue Nov  2 12:46:29 1999
+++ 2.3.25-shm3/mm/swapfile.c	Wed Nov  3 11:28:16 1999
@@ -135,8 +135,6 @@
 		goto out;

 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto out;
 	if (type >= nr_swapfiles)
 		goto bad_nofile;
 	p = & swap_info[type];
@@ -190,8 +188,6 @@
 		goto new_swap_entry;
 	entry.val = page->index;
 	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		goto new_swap_entry;
 	if (type >= nr_swapfiles)
 		goto new_swap_entry;
 	p = type + swap_info;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~1999-11-05 16:28 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1999-11-03 19:17 [Patch] shm cleanups Christoph Rohland
1999-11-03 21:30 Christoph Rohland
1999-11-04  8:10 ` Ingo Molnar
1999-11-04 12:40   ` Christoph Rohland
1999-11-04 17:58     ` Ingo Molnar
1999-11-04 19:02       ` Rik van Riel
1999-11-04 22:30         ` Ingo Molnar
1999-11-05  0:14           ` Andrea Arcangeli
1999-11-05 12:35             ` Christoph Rohland
1999-11-05 13:18               ` Andrea Arcangeli
1999-11-05 16:16                 ` Christoph Rohland
1999-11-05 16:21                   ` Andrea Arcangeli
1999-11-05 16:28                     ` Christoph Rohland
1999-11-05 10:36           ` Christoph Rohland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox