* [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
@ 1999-10-28 22:04 Kanoj Sarcar
1999-11-01 9:41 ` Christoph Rohland
0 siblings, 1 reply; 9+ messages in thread
From: Kanoj Sarcar @ 1999-10-28 22:04 UTC (permalink / raw)
To: torvalds; +Cc: linux-mm
Linus,
Per our previous discussion, this is the patch to change the shmget()
api to permit larger shm segments (now that larger user address spaces,
as well as large memory machines are possible).
Note that I have defined shmget() as
shmget(key_t, size_t, int)
instead of as
shmget(key_t, unsigned int, int)
or as
shmget(key_t, unsigned long, int).
This is because the single unix spec sets down the first definition
(http://www.opengroup.org/onlinepubs/007908799/xsh/shmget.html).
This becomes interesting, because size_t is of different sizes on
different architectures, so the shmfs code has to do careful formatting.
(This logic is also probably needed in the ipcs command).
Let me know if the patch looks okay.
Thanks.
Kanoj
--- /usr/tmp/p_rdiff_a002SA/shm.h Thu Oct 28 14:41:41 1999
+++ include/linux/shm.h Wed Oct 27 11:05:49 1999
@@ -7,7 +7,7 @@
struct shmid_ds {
struct ipc_perm shm_perm; /* operation perms */
- int shm_segsz; /* size of segment (bytes) */
+ size_t shm_segsz; /* size of segment (bytes) */
__kernel_time_t shm_atime; /* last attach time */
__kernel_time_t shm_dtime; /* last detach time */
__kernel_time_t shm_ctime; /* last change time */
@@ -46,7 +46,7 @@
#define SHM_INFO 14
struct shminfo {
- int shmmax;
+ size_t shmmax;
int shmmin;
int shmmni;
int shmseg;
@@ -68,7 +68,7 @@
#define SHM_DEST 01000 /* segment will be destroyed on last detach */
#define SHM_LOCKED 02000 /* segment will not be swapped */
-asmlinkage long sys_shmget (key_t key, int size, int flag);
+asmlinkage long sys_shmget (key_t key, size_t size, int flag);
asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr);
asmlinkage long sys_shmdt (char *shmaddr);
asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf);
--- /usr/tmp/p_rdiff_a002SJ/shm.c Thu Oct 28 14:41:55 1999
+++ ipc/shm.c Thu Oct 28 13:35:44 1999
@@ -27,7 +27,7 @@
extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
static int findkey (key_t key);
-static int newseg (key_t key, int shmflg, int size);
+static int newseg (key_t key, int shmflg, size_t size);
static int shm_map (struct vm_area_struct *shmd);
static void killseg (int id);
static void shm_open (struct vm_area_struct *shmd);
@@ -104,7 +104,7 @@
/*
* allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
*/
-static int newseg (key_t key, int shmflg, int size)
+static int newseg (key_t key, int shmflg, size_t size)
{
struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -168,9 +168,9 @@
return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
}
-int shmmax = SHMMAX;
+size_t shmmax = SHMMAX;
-asmlinkage long sys_shmget (key_t key, int size, int shmflg)
+asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
{
struct shmid_kernel *shp;
int err, id = 0;
@@ -177,7 +177,7 @@
down(¤t->mm->mmap_sem);
spin_lock(&shm_lock);
- if (size < 0 || size > shmmax) {
+ if (size > shmmax) {
err = -EINVAL;
} else if (key == IPC_PRIVATE) {
err = newseg(key, shmflg, size);
@@ -494,7 +494,7 @@
err = -ENOMEM;
addr = 0;
again:
- if (!(addr = get_unmapped_area(addr, shp->u.shm_segsz)))
+ if (!(addr = get_unmapped_area(addr, (unsigned long)shp->u.shm_segsz)))
goto out;
if(addr & (SHMLBA - 1)) {
addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
@@ -520,7 +520,7 @@
if (addr < current->mm->start_stack &&
addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
goto out;
- if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))
+ if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + (unsigned long)shp->u.shm_segsz))
goto out;
err = -EACCES;
@@ -863,7 +863,15 @@
spin_lock(&shm_lock);
for(i = 0; i < SHMMNI; i++)
if(shm_segs[i] != IPC_UNUSED) {
- len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n",
+#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
+#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
+ char *format;
+
+ if (sizeof(size_t) <= sizeof(int))
+ format = SMALL_STRING;
+ else
+ format = BIG_STRING;
+ len += sprintf(buffer + len, format,
shm_segs[i]->u.shm_perm.key,
shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
shm_segs[i]->u.shm_perm.mode,
--- /usr/tmp/p_rdiff_a002SS/sysctl.c Thu Oct 28 14:42:10 1999
+++ kernel/sysctl.c Thu Oct 28 12:47:50 1999
@@ -49,7 +49,7 @@
extern int sg_big_buff;
#endif
#ifdef CONFIG_SYSVIPC
-extern int shmmax;
+extern size_t shmmax;
#endif
#ifdef __sparc__
@@ -213,8 +213,8 @@
{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
0644, NULL, &proc_dointvec},
#ifdef CONFIG_SYSVIPC
- {KERN_SHMMAX, "shmmax", &shmmax, sizeof (int),
- 0644, NULL, &proc_dointvec},
+ {KERN_SHMMAX, "shmmax", &shmmax, sizeof (size_t),
+ 0644, NULL, &proc_doulongvec_minmax},
#endif
#ifdef CONFIG_MAGIC_SYSRQ
{KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-10-28 22:04 [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget() Kanoj Sarcar
@ 1999-11-01 9:41 ` Christoph Rohland
1999-11-01 17:00 ` Kanoj Sarcar
0 siblings, 1 reply; 9+ messages in thread
From: Christoph Rohland @ 1999-11-01 9:41 UTC (permalink / raw)
To: Kanoj Sarcar; +Cc: torvalds, linux-mm
Hi Kanoj,
This is probably breaking user space applications since shmid_ds is
shared with user space in shmctl(2). On 32bit machines this does not
matter, since sizeof(int) == sizeof(size_t), but on 64bit this will
break.
How do we handle this?
Greetings
Christoph
kanoj@google.engr.sgi.com (Kanoj Sarcar) writes:
> Linus,
>
> Per our previous discussion, this is the patch to change the shmget()
> api to permit larger shm segments (now that larger user address spaces,
> as well as large memory machines are possible).
>
> Note that I have defined shmget() as
> shmget(key_t, size_t, int)
> instead of as
> shmget(key_t, unsigned int, int)
> or as
> shmget(key_t, unsigned long, int).
>
> This is because the single unix spec sets down the first definition
> (http://www.opengroup.org/onlinepubs/007908799/xsh/shmget.html).
> This becomes interesting, because size_t is of different sizes on
> different architectures, so the shmfs code has to do careful formatting.
> (This logic is also probably needed in the ipcs command).
>
> Let me know if the patch looks okay.
>
> Thanks.
>
> Kanoj
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-01 9:41 ` Christoph Rohland
@ 1999-11-01 17:00 ` Kanoj Sarcar
1999-11-02 9:54 ` Christoph Rohland
1999-11-03 9:06 ` Christoph Rohland
0 siblings, 2 replies; 9+ messages in thread
From: Kanoj Sarcar @ 1999-11-01 17:00 UTC (permalink / raw)
To: Christoph Rohland; +Cc: torvalds, linux-mm
>
> Hi Kanoj,
>
> This is probably breaking user space applications since shmid_ds is
> shared with user space in shmctl(2). On 32bit machines this does not
> matter, since sizeof(int) == sizeof(size_t), but on 64bit this will
> break.
>
> How do we handle this?
Unfortunately, I don't think we can prevent this 64bit ABI breakage, if
we want to conform to the single unix spec on those platforms. Its
probably a good idea to have the ia64 port be SUS compliant, even though
sparc64/alpha are currently not.
If it is really important to preserve the 64bit ABI, there's one more
alternative: preserve the shmget() api/abi on the old 64bit platforms, but
be compliant on the 32 bit ones and newer 64 bit ones (mips64/ia64). This
is not the cleanest solution, but can be done with a little header file
reorganization in include/linux/shm.h and include/linux/shmparam.h.
Linus has put this patch into pre-25, lets talk if it is important to
do the above ... it shouldn't take me more than a couple of hours to
do it, if we so decided.
Thanks.
Kanoj
>
> Greetings
> Christoph
>
> kanoj@google.engr.sgi.com (Kanoj Sarcar) writes:
>
> > Linus,
> >
> > Per our previous discussion, this is the patch to change the shmget()
> > api to permit larger shm segments (now that larger user address spaces,
> > as well as large memory machines are possible).
> >
> > Note that I have defined shmget() as
> > shmget(key_t, size_t, int)
> > instead of as
> > shmget(key_t, unsigned int, int)
> > or as
> > shmget(key_t, unsigned long, int).
> >
> > This is because the single unix spec sets down the first definition
> > (http://www.opengroup.org/onlinepubs/007908799/xsh/shmget.html).
> > This becomes interesting, because size_t is of different sizes on
> > different architectures, so the shmfs code has to do careful formatting.
> > (This logic is also probably needed in the ipcs command).
> >
> > Let me know if the patch looks okay.
> >
> > Thanks.
> >
> > Kanoj
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-01 17:00 ` Kanoj Sarcar
@ 1999-11-02 9:54 ` Christoph Rohland
1999-11-02 21:24 ` Kanoj Sarcar
1999-11-03 9:06 ` Christoph Rohland
1 sibling, 1 reply; 9+ messages in thread
From: Christoph Rohland @ 1999-11-02 9:54 UTC (permalink / raw)
To: Kanoj Sarcar; +Cc: torvalds, linux-mm
[-- Attachment #1: Type: text/plain, Size: 1930 bytes --]
> >
> > Hi Kanoj,
> >
> > This is probably breaking user space applications since shmid_ds is
> > shared with user space in shmctl(2). On 32bit machines this does not
> > matter, since sizeof(int) == sizeof(size_t), but on 64bit this will
> > break.
> >
> > How do we handle this?
>
> Unfortunately, I don't think we can prevent this 64bit ABI breakage, if
> we want to conform to the single unix spec on those platforms. Its
> probably a good idea to have the ia64 port be SUS compliant, even though
> sparc64/alpha are currently not.
>
> If it is really important to preserve the 64bit ABI, there's one more
> alternative: preserve the shmget() api/abi on the old 64bit platforms, but
> be compliant on the 32 bit ones and newer 64 bit ones (mips64/ia64). This
> is not the cleanest solution, but can be done with a little header file
> reorganization in include/linux/shm.h and include/linux/shmparam.h.
>
> Linus has put this patch into pre-25, lets talk if it is important to
> do the above ... it shouldn't take me more than a couple of hours to
> do it, if we so decided.
Since glibc is encapsulating these calls and headers, we could perhaps
work with compatibility version. E.g. making shmget and shmctl a real
system call and converting the structures in sys_ipc to the old ones
for old libraries?
BTW I did some work to make the clean up the shm coding and make the
limites sysctleable. It also avoids vmalloc for the page tables. The
latter is really important for big servers. We run out of vm-space on
some benchmarks. I appended the patch against 2.3.24. I could not
finally test this patch since shm swapping has apparently a race
condition on segment deletion introduced with the smp version. I am
still investigating on that. But perhaps we could incorporate this
patch anyways. It did survive stress testing shm-swapping as long as I
do not remove segments.
Greetings
Christoph
[-- Attachment #2: patch-24-shm1 --]
[-- Type: application/octet-stream, Size: 44809 bytes --]
diff -uNr 2.3.24/arch/i386/kernel/sys_i386.c 2.3.24-shm1/arch/i386/kernel/sys_i386.c
--- 2.3.24/arch/i386/kernel/sys_i386.c Fri Dec 18 01:27:35 1998
+++ 2.3.24-shm1/arch/i386/kernel/sys_i386.c Mon Nov 1 15:51:36 1999
@@ -116,86 +116,71 @@
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
- if (call <= SEMCTL)
- switch (call) {
- case SEMOP:
- return sys_semop (first, (struct sembuf *)ptr, second);
- case SEMGET:
- return sys_semget (first, second, third);
- case SEMCTL: {
- union semun fourth;
- if (!ptr)
- return -EINVAL;
- if (get_user(fourth.__pad, (void **) ptr))
- return -EFAULT;
- return sys_semctl (first, second, third, fourth);
- }
- default:
- return -EINVAL;
- }
+ switch (call) {
+ case SEMOP:
+ return sys_semop (first, (struct sembuf *)ptr, second);
+ case SEMGET:
+ return sys_semget (first, second, third);
+ case SEMCTL: {
+ union semun fourth;
+ if (!ptr)
+ return -EINVAL;
+ if (get_user(fourth.__pad, (void **) ptr))
+ return -EFAULT;
+ return sys_semctl (first, second, third, fourth);
+ }
- if (call <= MSGCTL)
- switch (call) {
- case MSGSND:
- return sys_msgsnd (first, (struct msgbuf *) ptr,
- second, third);
- case MSGRCV:
- switch (version) {
- case 0: {
- struct ipc_kludge tmp;
- if (!ptr)
- return -EINVAL;
-
- if (copy_from_user(&tmp,
- (struct ipc_kludge *) ptr,
- sizeof (tmp)))
- return -EFAULT;
- return sys_msgrcv (first, tmp.msgp, second,
- tmp.msgtyp, third);
- }
- default:
- return sys_msgrcv (first,
- (struct msgbuf *) ptr,
- second, fifth, third);
- }
- case MSGGET:
- return sys_msgget ((key_t) first, second);
- case MSGCTL:
- return sys_msgctl (first, second,
- (struct msqid_ds *) ptr);
- default:
- return -EINVAL;
- }
- if (call <= SHMCTL)
- switch (call) {
- case SHMAT:
- switch (version) {
- default: {
- ulong raddr;
- ret = sys_shmat (first, (char *) ptr,
- second, &raddr);
- if (ret)
- return ret;
- return put_user (raddr, (ulong *) third);
- }
- case 1: /* iBCS2 emulator entry point */
- if (!segment_eq(get_fs(), get_ds()))
- return -EINVAL;
- return sys_shmat (first, (char *) ptr,
- second, (ulong *) third);
- }
- case SHMDT:
- return sys_shmdt ((char *)ptr);
- case SHMGET:
- return sys_shmget (first, second, third);
- case SHMCTL:
- return sys_shmctl (first, second,
- (struct shmid_ds *) ptr);
- default:
- return -EINVAL;
- }
-
- return -EINVAL;
+ case MSGSND:
+ return sys_msgsnd (first, (struct msgbuf *) ptr,
+ second, third);
+ case MSGRCV:
+ switch (version) {
+ case 0: {
+ struct ipc_kludge tmp;
+ if (!ptr)
+ return -EINVAL;
+
+ if (copy_from_user(&tmp,
+ (struct ipc_kludge *) ptr,
+ sizeof (tmp)))
+ return -EFAULT;
+ return sys_msgrcv (first, tmp.msgp, second,
+ tmp.msgtyp, third);
+ }
+ default:
+ return sys_msgrcv (first,
+ (struct msgbuf *) ptr,
+ second, fifth, third);
+ }
+ case MSGGET:
+ return sys_msgget ((key_t) first, second);
+ case MSGCTL:
+ return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+
+ case SHMAT:
+ switch (version) {
+ default: {
+ ulong raddr;
+ ret = sys_shmat (first, (char *) ptr, second, &raddr);
+ if (ret)
+ return ret;
+ return put_user (raddr, (ulong *) third);
+ }
+ case 1: /* iBCS2 emulator entry point */
+ if (!segment_eq(get_fs(), get_ds()))
+ return -EINVAL;
+ return sys_shmat (first, (char *) ptr, second, (ulong *) third);
+ }
+ case SHMDT:
+ return sys_shmdt ((char *)ptr);
+ case SHMGET:
+ return sys_shmget (first, second, third);
+ case SHMCTL:
+ return sys_shmctl (first, second,
+ (struct shmid_ds *) ptr);
+ default:
+ return -EINVAL;
+ }
}
/*
diff -uNr 2.3.24/include/asm-alpha/shmparam.h 2.3.24-shm1/include/asm-alpha/shmparam.h
--- 2.3.24/include/asm-alpha/shmparam.h Mon Oct 7 14:12:29 1996
+++ 2.3.24-shm1/include/asm-alpha/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,47 +1,6 @@
#ifndef _ASMAXP_SHMPARAM_H
#define _ASMAXP_SHMPARAM_H
-/*
- * Address range for shared memory attaches if no address passed to shmat().
- */
-#define SHM_RANGE_START 0x14000000000
-#define SHM_RANGE_END 0x15000000000
-
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the Alpha and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x3fa000 /* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _ASMAXP_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-arm/proc-armo/shmparam.h 2.3.24-shm1/include/asm-arm/proc-armo/shmparam.h
--- 2.3.24/include/asm-arm/proc-armo/shmparam.h Wed Jan 21 01:39:42 1998
+++ 2.3.24-shm1/include/asm-arm/proc-armo/shmparam.h Mon Nov 1 15:51:36 1999
@@ -9,9 +9,7 @@
#ifndef __ASM_PROC_SHMPARAM_H
#define __ASM_PROC_SHMPARAM_H
-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START 0x00a00000
-#define SHM_RANGE_END 0x00c00000
+#ifndef SHMMAX
#define SHMMAX 0x003fa000
#endif
diff -uNr 2.3.24/include/asm-arm/proc-armv/shmparam.h 2.3.24-shm1/include/asm-arm/proc-armv/shmparam.h
--- 2.3.24/include/asm-arm/proc-armv/shmparam.h Wed Jan 21 01:39:43 1998
+++ 2.3.24-shm1/include/asm-arm/proc-armv/shmparam.h Mon Nov 1 15:51:36 1999
@@ -10,9 +10,7 @@
#ifndef __ASM_PROC_SHMPARAM_H
#define __ASM_PROC_SHMPARAM_H
-#ifndef SHM_RANGE_START
-#define SHM_RANGE_START 0x50000000
-#define SHM_RANGE_END 0x60000000
+#ifndef SHMMAX
#define SHMMAX 0x01000000
#endif
diff -uNr 2.3.24/include/asm-arm/shmparam.h 2.3.24-shm1/include/asm-arm/shmparam.h
--- 2.3.24/include/asm-arm/shmparam.h Sun Oct 24 11:22:45 1999
+++ 2.3.24-shm1/include/asm-arm/shmparam.h Mon Nov 1 15:51:36 1999
@@ -3,39 +3,6 @@
#include <asm/proc/shmparam.h>
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _ASMARM_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-i386/shmparam.h 2.3.24-shm1/include/asm-i386/shmparam.h
--- 2.3.24/include/asm-i386/shmparam.h Sun Nov 8 23:06:18 1998
+++ 2.3.24-shm1/include/asm-i386/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,46 +1,6 @@
#ifndef _ASMI386_SHMPARAM_H
#define _ASMI386_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x50000000
-#define SHM_RANGE_END 0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _ASMI386_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-i386/softirq.h 2.3.24-shm1/include/asm-i386/softirq.h
--- 2.3.24/include/asm-i386/softirq.h Mon Nov 1 13:10:42 1999
+++ 2.3.24-shm1/include/asm-i386/softirq.h Mon Nov 1 15:51:36 1999
@@ -3,7 +3,6 @@
#include <asm/atomic.h>
#include <asm/hardirq.h>
-#include <linux/spinlock.h>
extern unsigned int local_bh_count[NR_CPUS];
diff -uNr 2.3.24/include/asm-m68k/shm.h 2.3.24-shm1/include/asm-m68k/shm.h
--- 2.3.24/include/asm-m68k/shm.h Sat Sep 18 20:49:55 1999
+++ 2.3.24-shm1/include/asm-m68k/shm.h Mon Nov 1 15:51:36 1999
@@ -4,7 +4,6 @@
/* format of page table entries that correspond to shared memory pages
currently out in swap space (see also mm/swap.c):
bits 0-1 (PAGE_PRESENT) is = 0
- bits 8..2 (SWP_TYPE) are = SHM_SWP_TYPE
bits 31..9 are used like this:
bits 15..9 (SHM_ID) the id of the shared memory segment
bits 30..16 (SHM_IDX) the index of the page within the shared memory segment
diff -uNr 2.3.24/include/asm-m68k/shmparam.h 2.3.24-shm1/include/asm-m68k/shmparam.h
--- 2.3.24/include/asm-m68k/shmparam.h Sat Sep 18 20:49:55 1999
+++ 2.3.24-shm1/include/asm-m68k/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,49 +1,6 @@
#ifndef _M68K_SHMPARAM_H
#define _M68K_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#ifndef CONFIG_SUN3
-#define SHM_RANGE_START 0xC0000000
-#define SHM_RANGE_END 0xD0000000
-#else
-#define SHM_RANGE_START 0x0C000000
-#define SHM_RANGE_END 0x0D000000
-#endif
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000 /* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _M68K_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-mips/shmparam.h 2.3.24-shm1/include/asm-mips/shmparam.h
--- 2.3.24/include/asm-mips/shmparam.h Sat Jun 26 02:37:53 1999
+++ 2.3.24-shm1/include/asm-mips/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,49 +1,9 @@
#ifndef __ASM_MIPS_SHMPARAM_H
#define __ASM_MIPS_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x50000000
-#define SHM_RANGE_END 0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
/*
* This constant is very large but the ABI in it's wisdom says ...
*/
#define SHMLBA 0x40000 /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
#endif /* __ASM_MIPS_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-ppc/shmparam.h 2.3.24-shm1/include/asm-ppc/shmparam.h
--- 2.3.24/include/asm-ppc/shmparam.h Sun Oct 10 14:02:31 1999
+++ 2.3.24-shm1/include/asm-ppc/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,44 +1,6 @@
#ifndef _PPC_SHMPARAM_H
#define _PPC_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x50000000
-#define SHM_RANGE_END 0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _PPC_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-sh/shmparam.h 2.3.24-shm1/include/asm-sh/shmparam.h
--- 2.3.24/include/asm-sh/shmparam.h Sat Sep 18 20:49:36 1999
+++ 2.3.24-shm1/include/asm-sh/shmparam.h Mon Nov 1 15:51:36 1999
@@ -1,46 +1,6 @@
#ifndef __ASM_SH_SHMPARAM_H
#define __ASM_SH_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x50000000
-#define SHM_RANGE_END 0x60000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
-/* Try not to change the default shipped SHMMAX - people rely on it */
-
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* __ASM_SH_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-sparc/shmparam.h 2.3.24-shm1/include/asm-sparc/shmparam.h
--- 2.3.24/include/asm-sparc/shmparam.h Sun Oct 4 19:22:44 1998
+++ 2.3.24-shm1/include/asm-sparc/shmparam.h Mon Nov 1 15:51:36 1999
@@ -2,44 +2,6 @@
#ifndef _ASMSPARC_SHMPARAM_H
#define _ASMSPARC_SHMPARAM_H
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x10000000
-#define SHM_RANGE_END 0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000 /* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
-#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
#endif /* _ASMSPARC_SHMPARAM_H */
diff -uNr 2.3.24/include/asm-sparc64/shmparam.h 2.3.24-shm1/include/asm-sparc64/shmparam.h
--- 2.3.24/include/asm-sparc64/shmparam.h Sun Oct 4 19:22:44 1998
+++ 2.3.24-shm1/include/asm-sparc64/shmparam.h Mon Nov 1 15:51:36 1999
@@ -2,46 +2,6 @@
#ifndef _ASMSPARC64_SHMPARAM_H
#define _ASMSPARC64_SHMPARAM_H
-/* XXX Redo most of this... */
-
-/* address range for shared memory attaches if no address passed to shmat() */
-#define SHM_RANGE_START 0x10000000
-#define SHM_RANGE_END 0x20000000
-
-/*
- * Format of a swap-entry for shared memory pages currently out in
- * swap space (see also mm/swap.c).
- *
- * SWP_TYPE = SHM_SWP_TYPE
- * SWP_OFFSET is used as follows:
- *
- * bits 0..6 : id of shared memory segment page belongs to (SHM_ID)
- * bits 7..21: index of page within shared memory segment (SHM_IDX)
- * (actually fewer bits get used since SHMMAX is so low)
- */
-
-/*
- * Keep _SHM_ID_BITS as low as possible since SHMMNI depends on it and
- * there is a static array of size SHMMNI.
- */
-#define _SHM_ID_BITS 7
-#define SHM_ID_MASK ((1<<_SHM_ID_BITS)-1)
-
-#define SHM_IDX_SHIFT (_SHM_ID_BITS)
-#define _SHM_IDX_BITS 15
-#define SHM_IDX_MASK ((1<<_SHM_IDX_BITS)-1)
-
-/*
- * _SHM_ID_BITS + _SHM_IDX_BITS must be <= 24 on the i386 and
- * SHMMAX <= (PAGE_SIZE << _SHM_IDX_BITS).
- */
-
-#define SHMMAX 0x1000000 /* max shared seg size (bytes) */
-#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
-#define SHMMNI (1<<_SHM_ID_BITS) /* max num of segs system wide */
-#define SHMALL /* max shm system wide (pages) */ \
- (1<<(_SHM_IDX_BITS+_SHM_ID_BITS))
#define SHMLBA (PAGE_SIZE<<1) /* attach addr a multiple of this */
-#define SHMSEG SHMMNI /* max shared segs per process */
#endif /* _ASMSPARC64_SHMPARAM_H */
diff -uNr 2.3.24/include/linux/shm.h 2.3.24-shm1/include/linux/shm.h
--- 2.3.24/include/linux/shm.h Sun Oct 24 11:22:49 1999
+++ 2.3.24-shm1/include/linux/shm.h Mon Nov 1 15:51:36 1999
@@ -3,6 +3,17 @@
#include <linux/ipc.h>
+/*
+ * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
+ * be increased by sysctl
+ */
+
+#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
+#define SHMMIN 1 /* really PAGE_SIZE */ /* min shared seg size (bytes) */
+#define SHMMNI 128 /* max num of segs system wide */
+#define SHMALL (SHMMAX/PAGE_SIZE*SHMMNI) /* max shm system wide (pages) */
+#define SHMSEG SHMMNI /* max shared segs per process */
+
#include <asm/shmparam.h>
struct shmid_ds {
@@ -17,15 +28,6 @@
unsigned short shm_unused; /* compatibility */
void *shm_unused2; /* ditto - used by DIPC */
void *shm_unused3; /* unused */
-};
-
-struct shmid_kernel
-{
- struct shmid_ds u;
- /* the following are private */
- unsigned long shm_npages; /* size of segment (pages) */
- pte_t *shm_pages; /* array of ptrs to frames -> SHMMAX */
- struct vm_area_struct *attaches; /* descriptors for attaches */
};
/* permission flag for shmget */
diff -uNr 2.3.24/include/linux/swap.h 2.3.24-shm1/include/linux/swap.h
--- 2.3.24/include/linux/swap.h Mon Nov 1 13:10:44 1999
+++ 2.3.24-shm1/include/linux/swap.h Mon Nov 1 15:51:36 1999
@@ -122,17 +122,6 @@
asmlinkage long sys_swapoff(const char *);
asmlinkage long sys_swapon(const char *, int);
-/*
- * vm_ops not present page codes for shared memory.
- *
- * Will go away eventually..
- */
-#define SHM_SWP_TYPE 0x20
-
-/*
- * swap cache stuff (in linux/mm/swap_state.c)
- */
-
#define SWAP_CACHE_INFO
#ifdef SWAP_CACHE_INFO
diff -uNr 2.3.24/include/linux/sysctl.h 2.3.24-shm1/include/linux/sysctl.h
--- 2.3.24/include/linux/sysctl.h Sat Oct 30 19:38:12 1999
+++ 2.3.24-shm1/include/linux/sysctl.h Mon Nov 1 15:51:36 1999
@@ -104,7 +104,7 @@
KERN_SYSRQ=38, /* int: Sysreq enable */
KERN_MAX_THREADS=39, /* int: Maximum nr of threads in the system */
KERN_RANDOM=40, /* Random driver */
- KERN_SHMALL=41 /* int: Maximum size of shared memory */
+ KERN_SEMMAX=41, /* int: limits for semaphores */
};
diff -uNr 2.3.24/ipc/shm.c 2.3.24-shm1/ipc/shm.c
--- 2.3.24/ipc/shm.c Sat Oct 30 19:38:13 1999
+++ 2.3.24-shm1/ipc/shm.c Mon Nov 1 15:51:36 1999
@@ -9,6 +9,8 @@
* BIGMEM support, Andrea Arcangeli <andrea@suse.de>
* SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
* HIGHMEM support, Ingo Molnar <mingo@redhat.com>
+ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
+ * Christoph Rohland <hans-christoph.rohland@sap.com>
*/
#include <linux/config.h>
@@ -25,7 +27,17 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
+#include "util.h"
+
+struct shmid_kernel /* extend struct shmis_ds with private fields */
+{
+ struct shmid_ds u;
+ unsigned long shm_npages; /* size of segment (pages) */
+ pte_t **shm_dir; /* ptr to array of ptrs to frames -> SHMMAX */
+ struct vm_area_struct *attaches; /* descriptors for attaches */
+ int id; /* backreference to id for shm_close */
+};
+
static int findkey (key_t key);
static int newseg (key_t key, int shmflg, int size);
static int shm_map (struct vm_area_struct *shmd);
@@ -38,13 +50,15 @@
static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
#endif
+unsigned int shm_prm[3] = {SHMMAX, SHMALL, SHMMNI};
+
static int shm_tot = 0; /* total number of shared memory pages */
static int shm_rss = 0; /* number of shared memory pages that are in memory */
static int shm_swp = 0; /* number of shared memory pages that are in swap */
-static int max_shmid = 0; /* every used id is <= max_shmid */
+static int max_shmid = -1; /* every used id is <= max_shmid */
static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
-static struct shmid_kernel *shm_segs[SHMMNI];
-
+static struct shmid_kernel **shm_segs = NULL;
+static unsigned int num_segs = 0;
static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
@@ -56,15 +70,12 @@
void __init shm_init (void)
{
- int id;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *ent;
#endif
- for (id = 0; id < SHMMNI; id++)
- shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
- shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
init_waitqueue_head(&shm_wait);
+
#ifdef CONFIG_PROC_FS
ent = create_proc_entry("sysvipc/shm", 0, 0);
ent->read_proc = sysvipc_shm_read_proc;
@@ -72,10 +83,103 @@
return;
}
+#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+
+static pte_t **shm_alloc(unsigned long pages)
+{
+ unsigned short dir = pages / PTRS_PER_PTE;
+ unsigned short last = pages % PTRS_PER_PTE;
+ pte_t **ret, **ptr;
+
+ ret = kmalloc ((dir+1) * sizeof(unsigned long), GFP_KERNEL);
+ if (ret == NULL)
+ return NULL;
+
+ for (ptr = ret; ptr < ret+dir ; ptr++)
+ {
+ *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
+ if (*ptr == NULL)
+ goto free;
+ memset (*ptr, 0, PAGE_SIZE);
+ }
+
+ /* The last one is probably not of PAGE_SIZE: we use kmalloc */
+ if (last) {
+ *ptr = kmalloc (last*sizeof(pte_t *), GFP_KERNEL);
+ if (*ptr == NULL)
+ goto free;
+ memset (*ptr, 0, last*sizeof(pte_t *));
+ }
+
+ return ret;
+
+free:
+ /* The last failed: we decrement first */
+ while (--ptr >= ret)
+ free_page ((unsigned long)*ptr);
+
+ kfree (ret);
+ return NULL;
+}
+
+
+static void shm_free(pte_t** dir, unsigned long pages)
+{
+ pte_t **ptr = dir+pages/PTRS_PER_PTE;
+
+ /* first the last page */
+ if (pages%PTRS_PER_PTE)
+ kfree (*ptr);
+ /* now the whole pages */
+ while (--ptr >= dir)
+ free_page ((unsigned long)*ptr);
+
+ /* Now the indirect block */
+ kfree (dir);
+}
+
+static int shm_expand (unsigned int size)
+{
+ int id;
+ struct shmid_kernel ** new_array;
+
+ spin_unlock(&shm_lock);
+ new_array = kmalloc (size * sizeof(struct shmid_kernel *), GFP_KERNEL);
+ spin_lock(&shm_lock);
+
+ if (!new_array)
+ return -ENOMEM;
+
+ if (size <= num_segs){ /* We check this after kmalloc so
+ nobody changes num_segs afterwards */
+ /*
+ * We never shrink the segment. If we shrink we have to
+ * check for stale handles in newseg
+ */
+ kfree (new_array);
+ return 0;
+ }
+
+ if (num_segs) {
+ memcpy (new_array, shm_segs,
+ size*sizeof(struct shmid_kernel *));
+ kfree (shm_segs);
+ }
+ for (id = num_segs; id < size; id++)
+ new_array[id] = (void *) IPC_UNUSED;
+
+ shm_segs = new_array;
+ num_segs = size;
+ return 0;
+}
+
static int findkey (key_t key)
{
int id;
struct shmid_kernel *shp;
+
+ if (!num_segs)
+ return -1;
for (id = 0; id <= max_shmid; id++) {
if ((shp = shm_segs[id]) == IPC_NOID) {
@@ -93,9 +197,8 @@
__set_current_state(TASK_RUNNING);
remove_wait_queue(&shm_wait, &wait);
}
- if (shp == IPC_UNUSED)
- continue;
- if (key == shp->u.shm_perm.key)
+ if (shp != IPC_UNUSED &&
+ key == shp->u.shm_perm.key)
return id;
}
return -1;
@@ -103,22 +206,38 @@
/*
* allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
+ * This has to be called with the shm_lock held
*/
static int newseg (key_t key, int shmflg, int size)
{
struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int id;
+ int id, err;
+ unsigned int shmall, shmmni;
+ lock_kernel();
+ shmall = shm_prm[1];
+ shmmni = shm_prm[2];
+ if (shmmni > IPCMNI) {
+ printk ("shmmni reset to max of %u\n", IPCMNI);
+ shmmni = shm_prm[2] = IPCMNI;
+ }
+ unlock_kernel();
+
+ if (shmmni < used_segs)
+ return -ENOSPC;
+ if ((err = shm_expand (shmmni)))
+ return err;
if (size < SHMMIN)
return -EINVAL;
- if (shm_tot + numpages >= SHMALL)
+ if (shm_tot + numpages >= shmall)
return -ENOSPC;
- for (id = 0; id < SHMMNI; id++)
+ for (id = 0; id < num_segs; id++)
if (shm_segs[id] == IPC_UNUSED) {
shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
goto found;
}
+
return -ENOSPC;
found:
@@ -130,10 +249,8 @@
wake_up (&shm_wait);
return -ENOMEM;
}
- lock_kernel();
- shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
- unlock_kernel();
- if (!shp->shm_pages) {
+ shp->shm_dir = shm_alloc (numpages);
+ if (!shp->shm_dir) {
kfree(shp);
spin_lock(&shm_lock);
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
@@ -141,8 +258,6 @@
return -ENOMEM;
}
- memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
-
shp->u.shm_perm.key = key;
shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -154,6 +269,7 @@
shp->u.shm_atime = shp->u.shm_dtime = 0;
shp->u.shm_ctime = CURRENT_TIME;
shp->shm_npages = numpages;
+ shp->id = id;
spin_lock(&shm_lock);
@@ -162,24 +278,29 @@
if (id > max_shmid)
max_shmid = id;
+
shm_segs[id] = shp;
used_segs++;
wake_up (&shm_wait);
- return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
+ return (unsigned int) shp->u.shm_perm.seq * IPCMNI + id;
}
-int shmmax = SHMMAX;
-
asmlinkage long sys_shmget (key_t key, int size, int shmflg)
{
struct shmid_kernel *shp;
int err, id = 0;
+ unsigned int shmmax;
+
+ lock_kernel();
+ shmmax = shm_prm[0];
+ unlock_kernel();
+
+ if (size < 0 || size > shmmax)
+ return -EINVAL;
down(¤t->mm->mmap_sem);
spin_lock(&shm_lock);
- if (size < 0 || size > shmmax) {
- err = -EINVAL;
- } else if (key == IPC_PRIVATE) {
+ if (key == IPC_PRIVATE) {
err = newseg(key, shmflg, size);
} else if ((id = findkey (key)) == -1) {
if (!(shmflg & IPC_CREAT))
@@ -197,7 +318,7 @@
else if (ipcperms (&shp->u.shm_perm, shmflg))
err = -EACCES;
else
- err = (int) shp->u.shm_perm.seq * SHMMNI + id;
+ err = (int) shp->u.shm_perm.seq * IPCMNI + id;
}
spin_unlock(&shm_lock);
up(¤t->mm->mmap_sem);
@@ -218,18 +339,18 @@
if (shp == IPC_NOID || shp == IPC_UNUSED)
BUG();
shp->u.shm_perm.seq++; /* for shmat */
- shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
+ shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/IPCMNI); /* increment, but avoid overflow */
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
used_segs--;
if (id == max_shmid)
- while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
- if (!shp->shm_pages)
- BUG();
+ while (max_shmid-- > 0 && (shm_segs[max_shmid] == IPC_UNUSED));
+ if (!shp->shm_dir)
+ BUG();
spin_unlock(&shm_lock);
numpages = shp->shm_npages;
for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
pte_t pte;
- pte = shp->shm_pages[i];
+ pte = SHM_ENTRY (shp,i);
if (pte_none(pte))
continue;
if (pte_present(pte)) {
@@ -242,9 +363,7 @@
swp++;
}
}
- lock_kernel();
- vfree(shp->shm_pages);
- unlock_kernel();
+ shm_free (shp->shm_dir, numpages);
kfree(shp);
spin_lock(&shm_lock);
shm_rss -= rss;
@@ -273,19 +392,20 @@
case IPC_INFO:
{
struct shminfo shminfo;
+ spin_unlock(&shm_lock);
err = -EFAULT;
if (!buf)
goto out;
- shminfo.shmmni = SHMMNI;
- shminfo.shmmax = shmmax;
+ lock_kernel();
+ shminfo.shmmni = shminfo.shmseg = shm_prm[2];
+ shminfo.shmmax = shm_prm[0];
+ shminfo.shmall = shm_prm[1];
+ unlock_kernel();
shminfo.shmmin = SHMMIN;
- shminfo.shmall = SHMALL;
- shminfo.shmseg = SHMSEG;
- spin_unlock(&shm_lock);
if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
goto out_unlocked;
spin_lock(&shm_lock);
- err = max_shmid;
+ err = max_shmid < 0 ? 0 : max_shmid;
goto out;
}
case SHM_INFO:
@@ -302,7 +422,7 @@
if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
goto out_unlocked;
spin_lock(&shm_lock);
- err = max_shmid;
+ err = max_shmid < 0 ? 0 : max_shmid;
goto out;
}
case SHM_STAT:
@@ -314,7 +434,7 @@
goto out;
if (ipcperms (&shp->u.shm_perm, S_IRUGO))
goto out;
- id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
+ id = (unsigned int) shp->u.shm_perm.seq * IPCMNI + shmid;
err = -EFAULT;
spin_unlock(&shm_lock);
if(copy_to_user (buf, &shp->u, sizeof(*buf)))
@@ -324,12 +444,13 @@
goto out;
}
- shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
err = -EINVAL;
- if (shp == IPC_UNUSED || shp == IPC_NOID)
+ if ((id = (unsigned int) shmid % IPCMNI) > max_shmid)
+ goto out;
+ if ((shp = shm_segs[id]) == IPC_UNUSED || shp == IPC_NOID)
goto out;
err = -EIDRM;
- if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+ if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
goto out;
ipcp = &shp->u.shm_perm;
@@ -484,7 +605,7 @@
if (shmid < 0)
goto out;
- shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
+ shp = shm_segs[id = (unsigned int) shmid % IPCMNI];
if (shp == IPC_UNUSED || shp == IPC_NOID)
goto out;
@@ -520,14 +641,15 @@
if (addr < current->mm->start_stack &&
addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
goto out;
- if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))
+ if (!(shmflg & SHM_REMAP) &&
+ find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))
goto out;
err = -EACCES;
if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
goto out;
err = -EIDRM;
- if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+ if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
goto out;
spin_unlock(&shm_lock);
@@ -536,13 +658,13 @@
spin_lock(&shm_lock);
if (!shmd)
goto out;
- if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
+ if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)) {
kmem_cache_free(vm_area_cachep, shmd);
err = -EIDRM;
goto out;
}
- shmd->vm_private_data = shm_segs + id;
+ shmd->vm_private_data = shm_segs[id];
shmd->vm_start = addr;
shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
shmd->vm_mm = current->mm;
@@ -588,7 +710,7 @@
struct shmid_kernel *shp;
spin_lock(&shm_lock);
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
insert_attach(shp,shmd); /* insert shmd into shp->attaches */
shp->u.shm_nattch++;
shp->u.shm_atime = CURRENT_TIME;
@@ -608,14 +730,12 @@
spin_lock(&shm_lock);
/* remove from the list of attaches of the shm segment */
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
remove_attach(shp,shmd); /* remove from shp->attaches */
shp->u.shm_lpid = current->pid;
shp->u.shm_dtime = CURRENT_TIME;
- if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
- unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
- killseg (id);
- }
+ if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+ killseg (shp->id);
spin_unlock(&shm_lock);
}
@@ -652,7 +772,7 @@
}
/*
- * page not present ... go through shm_pages
+ * page not present ... go through shm_dir
*/
static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
{
@@ -661,12 +781,12 @@
unsigned int idx;
struct page * page;
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;
spin_lock(&shm_lock);
again:
- pte = shp->shm_pages[idx];
+ pte = SHM_ENTRY(shp,idx);
if (!pte_present(pte)) {
if (pte_none(pte)) {
spin_unlock(&shm_lock);
@@ -675,7 +795,7 @@
goto oom;
clear_highpage(page);
spin_lock(&shm_lock);
- if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
+ if (pte_val(pte) != pte_val(SHM_ENTRY(shp, idx)))
goto changed;
} else {
pte_t entry = pte;
@@ -697,18 +817,18 @@
unlock_kernel();
spin_lock(&shm_lock);
shm_swp--;
- pte = shp->shm_pages[idx];
+ pte = SHM_ENTRY(shp, idx);
if (pte_present(pte))
goto present;
}
shm_rss++;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte;
+ SHM_ENTRY(shp, idx) = pte;
} else
--current->maj_flt; /* was incremented in do_no_page */
done:
- /* pte_val(pte) == shp->shm_pages[idx] */
+ /* pte_val(pte) == SHM_ENTRY (shp, idx) */
get_page(pte_page(pte));
spin_unlock(&shm_lock);
current->min_flt++;
@@ -770,7 +890,7 @@
if (idx >= shp->shm_npages)
goto next_id;
- page = shp->shm_pages[idx];
+ page = SHM_ENTRY(shp, idx);
if (!pte_present(page))
goto check_table;
page_map = pte_page(page);
@@ -792,7 +912,7 @@
goto check_table;
if (!(page_map = prepare_highmem_swapout(page_map)))
goto check_table;
- shp->shm_pages[idx] = swap_entry;
+ SHM_ENTRY(shp, idx) = swap_entry;
swap_successes++;
shm_swp++;
shm_rss--;
@@ -812,12 +932,12 @@
* Free the swap entry and set the new pte for the shm page.
*/
static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
- pte_t entry, struct page *page)
+ pte_t entry, struct page *page)
{
pte_t pte;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte;
+ SHM_ENTRY(shp, idx) = pte;
get_page(page);
shm_rss++;
@@ -837,12 +957,12 @@
int i, n;
spin_lock(&shm_lock);
- for (i = 0; i < SHMMNI; i++) {
+ for (i = 0; i <= max_shmid; i++) {
struct shmid_kernel *seg = shm_segs[i];
if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
continue;
for (n = 0; n < seg->shm_npages; n++)
- if (pte_val(seg->shm_pages[n]) == pte_val(entry)) {
+ if (pte_val(SHM_ENTRY(seg,n)) == pte_val(entry)) {
shm_unuse_page(seg, n, entry, page);
return;
}
@@ -860,11 +980,11 @@
len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
spin_lock(&shm_lock);
- for(i = 0; i < SHMMNI; i++)
+ for(i = 0; i <= max_shmid; i++)
if(shm_segs[i] != IPC_UNUSED) {
len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n",
shm_segs[i]->u.shm_perm.key,
- shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
+ shm_segs[i]->u.shm_perm.seq * IPCMNI + i,
shm_segs[i]->u.shm_perm.mode,
shm_segs[i]->u.shm_segsz,
shm_segs[i]->u.shm_cpid,
diff -uNr 2.3.24/ipc/util.c 2.3.24-shm1/ipc/util.c
--- 2.3.24/ipc/util.c Sat Oct 30 19:38:13 1999
+++ 2.3.24-shm1/ipc/util.c Mon Nov 1 15:51:36 1999
@@ -13,6 +13,9 @@
#include <linux/shm.h>
#include <linux/init.h>
#include <linux/msg.h>
+#include <linux/malloc.h>
+
+#include "util.h"
#if defined(CONFIG_SYSVIPC)
diff -uNr 2.3.24/ipc/util.h 2.3.24-shm1/ipc/util.h
--- 2.3.24/ipc/util.h Thu Jan 1 01:00:00 1970
+++ 2.3.24-shm1/ipc/util.h Mon Nov 1 15:51:36 1999
@@ -0,0 +1,12 @@
+/*
+ * linux/ipc/util.h
+ * Copyright (C) 1999 Christoph Rohland
+ */
+
+/*
+ * IPCMNI is the absolute maximum for ipc identifier. This is used to
+ * detect stale identifiers
+ */
+#define IPCMNI (1<<15)
+
+extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
diff -uNr 2.3.24/kernel/sysctl.c 2.3.24-shm1/kernel/sysctl.c
--- 2.3.24/kernel/sysctl.c Sun Oct 10 14:02:32 1999
+++ 2.3.24-shm1/kernel/sysctl.c Mon Nov 1 15:51:36 1999
@@ -49,7 +49,7 @@
extern int sg_big_buff;
#endif
#ifdef CONFIG_SYSVIPC
-extern int shmmax;
+extern int shm_prm[];
#endif
#ifdef __sparc__
@@ -213,8 +213,8 @@
{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
0644, NULL, &proc_dointvec},
#ifdef CONFIG_SYSVIPC
- {KERN_SHMMAX, "shmmax", &shmmax, sizeof (int),
- 0644, NULL, &proc_dointvec},
+ {KERN_SHMMAX, "shmmax", &shm_prm, 3*sizeof(unsigned int),
+ 0644, NULL, &proc_dointvec},
#endif
#ifdef CONFIG_MAGIC_SYSRQ
{KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
diff -uNr 2.3.24/mm/swap_state.c 2.3.24-shm1/mm/swap_state.c
--- 2.3.24/mm/swap_state.c Sat Oct 30 19:38:13 1999
+++ 2.3.24-shm1/mm/swap_state.c Mon Nov 1 15:51:36 1999
@@ -67,8 +67,6 @@
if (!pte_val(entry))
goto out;
type = SWP_TYPE(entry);
- if (type & SHM_SWP_TYPE)
- goto out;
if (type >= nr_swapfiles)
goto bad_file;
p = type + swap_info;
@@ -113,8 +111,6 @@
if (!pte_val(entry))
goto bad_entry;
type = SWP_TYPE(entry);
- if (type & SHM_SWP_TYPE)
- goto out;
if (type >= nr_swapfiles)
goto bad_file;
p = type + swap_info;
diff -uNr 2.3.24/mm/swapfile.c 2.3.24-shm1/mm/swapfile.c
--- 2.3.24/mm/swapfile.c Sat Oct 30 19:38:13 1999
+++ 2.3.24-shm1/mm/swapfile.c Mon Nov 1 15:51:36 1999
@@ -134,8 +134,6 @@
goto out;
type = SWP_TYPE(entry);
- if (type & SHM_SWP_TYPE)
- goto out;
if (type >= nr_swapfiles)
goto bad_nofile;
p = & swap_info[type];
@@ -189,8 +187,6 @@
goto new_swap_entry;
entry = get_pagecache_pte(page);
type = SWP_TYPE(entry);
- if (type & SHM_SWP_TYPE)
- goto new_swap_entry;
if (type >= nr_swapfiles)
goto new_swap_entry;
p = type + swap_info;
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-02 9:54 ` Christoph Rohland
@ 1999-11-02 21:24 ` Kanoj Sarcar
1999-11-02 21:45 ` Christoph Rohland
0 siblings, 1 reply; 9+ messages in thread
From: Kanoj Sarcar @ 1999-11-02 21:24 UTC (permalink / raw)
To: Christoph Rohland; +Cc: torvalds, linux-mm
>
> Since glibc is encapsulating these calls and headers, we could perhaps
> work with compatibility version. E.g. making shmget and shmctl a real
> system call and converting the structures in sys_ipc to the old ones
> for old libraries?
>
> BTW I did some work to make the clean up the shm coding and make the
> limites sysctleable. It also avoids vmalloc for the page tables. The
> latter is really important for big servers. We run out of vm-space on
> some benchmarks. I appended the patch against 2.3.24. I could not
> finally test this patch since shm swapping has apparently a race
> condition on segment deletion introduced with the smp version. I am
> still investigating on that. But perhaps we could incorporate this
> patch anyways. It did survive stress testing shm-swapping as long as I
> do not remove segments.
>
The clean up code is similar to what I posted at
http://humbolt.geo.uu.nl/lists/linux-mm/1999-06/msg00071.html
previously. Although, I would point out that SHMMAX probably belongs
to the asm/* header file (specially, with the size_t size parameter
to shmget()).
The sysctl idea is good, although you need to clean up the code, and
make 2 new nodes /proc/sys/kernel/* for ease of use.
The removal of struct shmid_kernel from shm.h to a private header
file, or to shm.c is a very good idea. This has no business being
user visible. Cleanups like this go a long way in creating a clean
ddi/dki ...
The removal of vmalloc() from the shm.c sounds good in principle,
although I haven't really reviewed your code in any detail ...
Thanks.
Kanoj
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-02 21:24 ` Kanoj Sarcar
@ 1999-11-02 21:45 ` Christoph Rohland
1999-11-02 21:56 ` Kanoj Sarcar
0 siblings, 1 reply; 9+ messages in thread
From: Christoph Rohland @ 1999-11-02 21:45 UTC (permalink / raw)
To: Kanoj Sarcar; +Cc: Christoph Rohland, torvalds, linux-mm
kanoj@google.engr.sgi.com (Kanoj Sarcar) writes:
> >
> > Since glibc is encapsulating these calls and headers, we could perhaps
> > work with compatibility version. E.g. making shmget and shmctl a real
> > system call and converting the structures in sys_ipc to the old ones
> > for old libraries?
> >
> > BTW I did some work to make the clean up the shm coding and make the
> > limites sysctleable. It also avoids vmalloc for the page tables. The
> > latter is really important for big servers. We run out of vm-space on
> > some benchmarks. I appended the patch against 2.3.24. I could not
> > finally test this patch since shm swapping has apparently a race
> > condition on segment deletion introduced with the smp version. I am
> > still investigating on that. But perhaps we could incorporate this
> > patch anyways. It did survive stress testing shm-swapping as long as I
> > do not remove segments.
> >
>
> The clean up code is similar to what I posted at
>
> http://humbolt.geo.uu.nl/lists/linux-mm/1999-06/msg00071.html
>
> previously. Although, I would point out that SHMMAX probably belongs
> to the asm/* header file (specially, with the size_t size parameter
> to shmget()).
Why should we make it arch dependend if we can tune it at runtime?
> The sysctl idea is good, although you need to clean up the code, and
> make 2 new nodes /proc/sys/kernel/* for ease of use.
I preferred not to clutter the proc/sys/kernel namespace, but this is
arguable. I think tuning these parameters belongs together and can
easily be done in one file. Especially since you can tune the most
import one alone. (SHMMAX is the first)
> The removal of struct shmid_kernel from shm.h to a private header
> file, or to shm.c is a very good idea. This has no business being
> user visible. Cleanups like this go a long way in creating a clean
> ddi/dki ...
>
> The removal of vmalloc() from the shm.c sounds good in principle,
> although I haven't really reviewed your code in any detail ...
It is crucial for big machines especially with highmem support. I
still try to get 2.3.25 to swap out shm pages. There was something
introduced after 2.3.22 which prevents shm pages to swap out (w/o my
patch). At least 2.3.25 survives running out of memory with high
memory so I can investigate.
Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-02 21:45 ` Christoph Rohland
@ 1999-11-02 21:56 ` Kanoj Sarcar
1999-11-02 22:09 ` Christoph Rohland
0 siblings, 1 reply; 9+ messages in thread
From: Kanoj Sarcar @ 1999-11-02 21:56 UTC (permalink / raw)
To: Christoph Rohland; +Cc: torvalds, linux-mm
> > The clean up code is similar to what I posted at
> >
> > http://humbolt.geo.uu.nl/lists/linux-mm/1999-06/msg00071.html
> >
> > previously. Although, I would point out that SHMMAX probably belongs
> > to the asm/* header file (specially, with the size_t size parameter
> > to shmget()).
>
> Why should we make it arch dependend if we can tune it at runtime?
>
Probably 95% of people who run Linux have no idea what /proc/sys/kernel/shmmax
is, and end up recompiling the kernel with a bumped up SHMMAX, if they find
SHMMAX too low for their app. On sparc64/alpha and yet to come mips64/ia64,
SHMMAX can be pretty huge, compared to the ia32 0x2000000. Think out
of the box, and you will see that keeping SHMMAX asm dependent will work
better for most people ...
Kanoj
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-02 21:56 ` Kanoj Sarcar
@ 1999-11-02 22:09 ` Christoph Rohland
0 siblings, 0 replies; 9+ messages in thread
From: Christoph Rohland @ 1999-11-02 22:09 UTC (permalink / raw)
To: Kanoj Sarcar; +Cc: torvalds, linux-mm
kanoj@google.engr.sgi.com (Kanoj Sarcar) writes:
> > > The clean up code is similar to what I posted at
> > >
> > > http://humbolt.geo.uu.nl/lists/linux-mm/1999-06/msg00071.html
> > >
> > > previously. Although, I would point out that SHMMAX probably belongs
> > > to the asm/* header file (specially, with the size_t size parameter
> > > to shmget()).
> >
> > Why should we make it arch dependend if we can tune it at runtime?
> >
>
> Probably 95% of people who run Linux have no idea what
> /proc/sys/kernel/shmmax is, and end up recompiling the kernel with a
> bumped up SHMMAX, if they find SHMMAX too low for their app. On
> sparc64/alpha and yet to come mips64/ia64, SHMMAX can be pretty
> huge, compared to the ia32 0x2000000. Think out of the box, and you
> will see that keeping SHMMAX asm dependent will work better for most
> people ...
So we should include a comment in shm.h to do it via sysctl. For
bigger machines you have many parameters to tune. So the people have
to get used to sysctl and we will need a frontend to tune them
persistently.
At least that's what I would do. In general I think it is good to keep
the arch dependend part as small as possible. But I have no real
objection for doing it the other way besides this.
Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget()
1999-11-01 17:00 ` Kanoj Sarcar
1999-11-02 9:54 ` Christoph Rohland
@ 1999-11-03 9:06 ` Christoph Rohland
1 sibling, 0 replies; 9+ messages in thread
From: Christoph Rohland @ 1999-11-03 9:06 UTC (permalink / raw)
To: Kanoj Sarcar; +Cc: linux-mm
Hi Kanoj,
apparently my Mail got somehow broken up during transfer. At least I
could not read my comments in the version from the list so I resend my
comments.
Grettings
Christoph
kanoj@google.engr.sgi.com (Kanoj Sarcar) writes:
> >
> > Hi Kanoj,
> >
> > This is probably breaking user space applications since shmid_ds is
> > shared with user space in shmctl(2). On 32bit machines this does not
> > matter, since sizeof(int) == sizeof(size_t), but on 64bit this will
> > break.
> >
> > How do we handle this?
>
> Unfortunately, I don't think we can prevent this 64bit ABI breakage, if
> we want to conform to the single unix spec on those platforms. Its
> probably a good idea to have the ia64 port be SUS compliant, even though
> sparc64/alpha are currently not.
>
> If it is really important to preserve the 64bit ABI, there's one more
> alternative: preserve the shmget() api/abi on the old 64bit platforms, but
> be compliant on the 32 bit ones and newer 64 bit ones (mips64/ia64). This
> is not the cleanest solution, but can be done with a little header file
> reorganization in include/linux/shm.h and include/linux/shmparam.h.
>
> Linus has put this patch into pre-25, lets talk if it is important to
> do the above ... it shouldn't take me more than a couple of hours to
> do it, if we so decided.
Since glibc is encapsulating these calls and headers, we could perhaps
work with compatibility version. E.g. making shmget and shmctl a real
system call and converting the structures in sys_ipc to the old ones
for old libraries?
BTW I did some work to make the clean up the shm coding and make the
limites sysctleable. It also avoids vmalloc for the page tables. The
latter is really important for big servers. We run out of vm-space on
some benchmarks. I appended the patch against 2.3.24. I could not
finally test this patch since shm swapping has apparently a race
condition on segment deletion introduced with the smp version. I am
still investigating on that. But perhaps we could incorporate this
patch anyways. It did survive stress testing shm-swapping as long as I
do not remove segments.
Greetings
Christoph
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~1999-11-03 9:06 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1999-10-28 22:04 [PATCH] kanoj-mm21-2.3.23 alow larger sizes to shmget() Kanoj Sarcar
1999-11-01 9:41 ` Christoph Rohland
1999-11-01 17:00 ` Kanoj Sarcar
1999-11-02 9:54 ` Christoph Rohland
1999-11-02 21:24 ` Kanoj Sarcar
1999-11-02 21:45 ` Christoph Rohland
1999-11-02 21:56 ` Kanoj Sarcar
1999-11-02 22:09 ` Christoph Rohland
1999-11-03 9:06 ` Christoph Rohland
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox