* shm_alloc and friends
@ 2000-05-25 14:24 Russell King
2000-05-25 14:31 ` Alan Cox
0 siblings, 1 reply; 9+ messages in thread
From: Russell King @ 2000-05-25 14:24 UTC (permalink / raw)
To: riel; +Cc: linux-kernel, linux-mm
Rik,
Ok, I re-reviewed the code (been looking at this code too long), and I
think the change is orthogonal wrt the rest of the code. Here is a patch
of my changes to -pre8 thus far (including the --count < 0 at the end).
If you'd like to give your blessing on this code, I'll pass it to Linus
this evening for integration into the next prepre patch.
With these changes, my box has survived about 20 minutes thus far running
hdparm -t's, which is the most its survived with this.
The pluses are:
- uses less memory than the original to store the same information -
we're no longer wasting memory to store pointers to pointers to the
pte and swap entries. Instead, they're in one big array allocated
by vmalloc().
- code is cleaner as a result - no loops within loops to clear
and scan arrays of values within arrays of pointers.
- code should be faster - no longer have to divide and modulus
the number of pages
- no longer reliant on PAGE_SIZE >= (sizeof(pte) * PTRS_PER_PTE)
Problems:
- memsetting the vmalloced area to initialise the pte's.
(Note: pte_clear can't be used, because that is expected to be used
on page tables allocated by pte_alloc - the old code made this
mistake).
You may have "pte_none" signified by a non-zero value on some other
architecture, in which case, the memsetting will break this. I don't
see a way around this without introducing an "empty_pte()" construct
which always satisfies (pte_none(empty_pte()) != 0)
I just noticed in this patch - the second to last hunk had my
"check_free_lists()" calls still in, which I've just deleted. Hopefully
the patch still works. I'll re-generate it for Linus.
--- linux.orig/ipc/shm.c Sat May 13 01:25:16 2000
+++ linux/ipc/shm.c Thu May 25 15:04:52 2000
@@ -81,7 +81,7 @@
size_t shm_segsz;
unsigned long shm_nattch;
unsigned long shm_npages; /* size of segment (pages) */
- pte_t **shm_dir; /* ptr to arr of ptrs to frames */
+ pte_t *shm_dir; /* ptrs to frames */
int id;
union permap {
struct shmem {
@@ -546,38 +546,21 @@
return 0;
}
-#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+#define SHM_ENTRY(shp, index) (shp)->shm_dir[index]
-static pte_t **shm_alloc(unsigned long pages, int doacc)
+static pte_t *shm_alloc(unsigned long pages, int doacc)
{
- unsigned short dir = pages / PTRS_PER_PTE;
- unsigned short last = pages % PTRS_PER_PTE;
- pte_t **ret, **ptr, *pte;
+ pte_t *ret;
if (pages == 0)
return NULL;
- ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
+ ret = (pte_t *)vmalloc(pages * sizeof(pte_t));
if (!ret)
goto nomem;
- for (ptr = ret; ptr < ret+dir ; ptr++)
- {
- *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
- if (!*ptr)
- goto free;
- for (pte = *ptr; pte < *ptr + PTRS_PER_PTE; pte++)
- pte_clear (pte);
- }
+ memset(ret, 0, pages * sizeof(pte_t));
- /* The last one is probably not of PAGE_SIZE: we use kmalloc */
- if (last) {
- *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
- if (!*ptr)
- goto free;
- for (pte = *ptr; pte < *ptr + last; pte++)
- pte_clear (pte);
- }
if (doacc) {
shm_lockall();
shm_tot += pages;
@@ -586,27 +569,19 @@
}
return ret;
-free:
- /* The last failed: we decrement first */
- while (--ptr >= ret)
- free_page ((unsigned long)*ptr);
-
- kfree (ret);
nomem:
return ERR_PTR(-ENOMEM);
}
-static void shm_free(pte_t** dir, unsigned long pages, int doacc)
+static void shm_free(pte_t *dir, unsigned long pages, int doacc)
{
int i, rss, swp;
- pte_t **ptr = dir+pages/PTRS_PER_PTE;
if (!dir)
return;
for (i = 0, rss = 0, swp = 0; i < pages ; i++) {
- pte_t pte;
- pte = dir[i/PTRS_PER_PTE][i%PTRS_PER_PTE];
+ pte_t pte = dir[i];
if (pte_none(pte))
continue;
if (pte_present(pte)) {
@@ -618,16 +593,7 @@
}
}
- /* first the last page */
- if (pages%PTRS_PER_PTE)
- kfree (*ptr);
- /* now the whole pages */
- while (--ptr >= dir)
- if (*ptr)
- free_page ((unsigned long)*ptr);
-
- /* Now the indirect block */
- kfree (dir);
+ vfree(dir);
if (doacc) {
shm_lockall();
@@ -645,7 +611,7 @@
struct inode *inode = dentry->d_inode;
struct shmid_kernel *shp;
unsigned long new_pages, old_pages;
- pte_t **new_dir, **old_dir;
+ pte_t *new_dir, *old_dir;
error = inode_change_ok(inode, attr);
if (error)
@@ -673,18 +639,15 @@
old_dir = shp->shm_dir;
old_pages = shp->shm_npages;
if (old_dir){
- pte_t *swap;
- int i,j;
- i = old_pages < new_pages ? old_pages : new_pages;
- j = i % PTRS_PER_PTE;
- i /= PTRS_PER_PTE;
- if (j)
- memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
- while (i--) {
- swap = new_dir[i];
- new_dir[i] = old_dir[i];
- old_dir[i] = swap;
- }
+ int pages = old_pages < new_pages ? old_pages : new_pages;
+
+ /*
+ * Copy the pte pointers from the old dir to the new dir,
+ * remembering to remove the copied pointers from the old
+ * dir.
+ */
+ memcpy(new_dir, old_dir, pages * sizeof(pte_t));
+ memset(old_dir, 0, pages * sizeof(pte_t));
}
shp->shm_dir = new_dir;
shp->shm_npages = new_pages;
@@ -700,13 +663,14 @@
static struct shmid_kernel *seg_alloc(int numpages, size_t namelen)
{
struct shmid_kernel *shp;
- pte_t **dir;
+ pte_t *dir;
shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
if (!shp)
return ERR_PTR(-ENOMEM);
-
+
dir = shm_alloc (numpages, namelen);
+
if (IS_ERR(dir)) {
kfree(shp);
return ERR_PTR(PTR_ERR(dir));
@@ -1424,7 +1388,7 @@
return RETRY;
if (shp->id != zero_id) swap_attempts++;
- if (--counter < 0) /* failed */
+ if (--*counter < 0) /* failed */
return FAILED;
if (page_count(page_map) != 1)
return RETRY;
_____
|_____| ------------------------------------------------- ---+---+-
| | Russell King rmk@arm.linux.org.uk --- ---
| | | | http://www.arm.linux.org.uk/~rmk/aboutme.html / / |
| +-+-+ --- -+-
/ | THE developer of ARM Linux |+| /|\
/ | | | --- |
+-+-+ ------------------------------------------------- /\\\ |
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: shm_alloc and friends
2000-05-25 14:24 shm_alloc and friends Russell King
@ 2000-05-25 14:31 ` Alan Cox
2000-05-25 14:36 ` Russell King
0 siblings, 1 reply; 9+ messages in thread
From: Alan Cox @ 2000-05-25 14:31 UTC (permalink / raw)
To: Russell King; +Cc: riel, linux-kernel, linux-mm
> Problems:
> - memsetting the vmalloced area to initialise the pte's.
> (Note: pte_clear can't be used, because that is expected to be used
Sorry that breaks S/390. You cannot use memset here.
Alan
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: shm_alloc and friends
2000-05-25 14:31 ` Alan Cox
@ 2000-05-25 14:36 ` Russell King
2000-05-25 14:59 ` Alan Cox
2000-05-26 14:07 ` Eric W. Biederman
0 siblings, 2 replies; 9+ messages in thread
From: Russell King @ 2000-05-25 14:36 UTC (permalink / raw)
To: Alan Cox; +Cc: riel, linux-kernel, linux-mm
Alan Cox writes:
> > Problems:
> > - memsetting the vmalloced area to initialise the pte's.
> > (Note: pte_clear can't be used, because that is expected to be used
>
> Sorry that breaks S/390. You cannot use memset here.
Suggestions on a fix for that?
_____
|_____| ------------------------------------------------- ---+---+-
| | Russell King rmk@arm.linux.org.uk --- ---
| | | | http://www.arm.linux.org.uk/~rmk/aboutme.html / / |
| +-+-+ --- -+-
/ | THE developer of ARM Linux |+| /|\
/ | | | --- |
+-+-+ ------------------------------------------------- /\\\ |
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: shm_alloc and friends
2000-05-25 14:36 ` Russell King
@ 2000-05-25 14:59 ` Alan Cox
2000-05-25 15:20 ` Russell King
2000-05-26 14:07 ` Eric W. Biederman
1 sibling, 1 reply; 9+ messages in thread
From: Alan Cox @ 2000-05-25 14:59 UTC (permalink / raw)
To: Russell King; +Cc: Alan Cox, riel, linux-kernel, linux-mm
> > > Problems:
> > > - memsetting the vmalloced area to initialise the pte's.
> > > (Note: pte_clear can't be used, because that is expected to be used
> >
> > Sorry that breaks S/390. You cannot use memset here.
>
> Suggestions on a fix for that?
Use pte_clear. That is the only valid way to do it. Im not sure I follow why
you cant use pte_clear in this case
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: shm_alloc and friends
2000-05-25 14:59 ` Alan Cox
@ 2000-05-25 15:20 ` Russell King
2000-05-25 15:59 ` Alan Cox
2000-05-25 16:01 ` Benjamin C.R. LaHaise
0 siblings, 2 replies; 9+ messages in thread
From: Russell King @ 2000-05-25 15:20 UTC (permalink / raw)
To: Alan Cox; +Cc: riel, linux-kernel, linux-mm
Alan Cox writes:
> Use pte_clear. That is the only valid way to do it. Im not sure I follow why
> you cant use pte_clear in this case
pte_clear has other side effects on ARM, since we don't have enough bits in the
page tables to store all the bits that Linux needs. In fact, there are NO bits
in the page table entries which are not CPU defined.
Therefore, we have to store the CPU's version of the page pointers at ptep[0]
through to ptep[1023], and the kernel's version with all the associated flags
in ptep[-1024] through to ptep[-1].
pte_alloc and friends are aware of this because they are allocating page tables,
and pte_clear is supposed to be used on page tables.
SHM uses it on *pages* allocated from __get_free_page() and kmalloc, which are
not page tables.
Therefore, really SHM's use of pte_clear is a hack in the extreme, breaking the
architecture independence of the page table macros.
_____
|_____| ------------------------------------------------- ---+---+-
| | Russell King rmk@arm.linux.org.uk --- ---
| | | | http://www.arm.linux.org.uk/~rmk/aboutme.html / / |
| +-+-+ --- -+-
/ | THE developer of ARM Linux |+| /|\
/ | | | --- |
+-+-+ ------------------------------------------------- /\\\ |
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: shm_alloc and friends
2000-05-25 15:20 ` Russell King
@ 2000-05-25 15:59 ` Alan Cox
2000-05-25 16:01 ` Benjamin C.R. LaHaise
1 sibling, 0 replies; 9+ messages in thread
From: Alan Cox @ 2000-05-25 15:59 UTC (permalink / raw)
To: Russell King; +Cc: Alan Cox, riel, linux-kernel, linux-mm
> > Use pte_clear. That is the only valid way to do it. Im not sure I follow why
> > you cant use pte_clear in this case
>
> pte_clear has other side effects on ARM, since we don't have enough bits in the
> page tables to store all the bits that Linux needs. In fact, there are NO bits
> in the page table entries which are not CPU defined.
How about adding a seperate pte_init() then ?
> Therefore, really SHM's use of pte_clear is a hack in the extreme, breaking the
> architecture independence of the page table macros.
Agreed.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: shm_alloc and friends
2000-05-25 15:20 ` Russell King
2000-05-25 15:59 ` Alan Cox
@ 2000-05-25 16:01 ` Benjamin C.R. LaHaise
2000-05-25 16:04 ` Russell King
1 sibling, 1 reply; 9+ messages in thread
From: Benjamin C.R. LaHaise @ 2000-05-25 16:01 UTC (permalink / raw)
To: Russell King; +Cc: Alan Cox, riel, linux-kernel, linux-mm
On Thu, 25 May 2000, Russell King wrote:
> SHM uses it on *pages* allocated from __get_free_page() and kmalloc, which are
> not page tables.
>
> Therefore, really SHM's use of pte_clear is a hack in the extreme, breaking the
> architecture independence of the page table macros.
Okay, so how about changing the SHM code to make use of pte_alloc and co?
If we do that, then we can also make the optimisation of sharing ptes for
really big SHM segments.
-ben
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: shm_alloc and friends
2000-05-25 16:01 ` Benjamin C.R. LaHaise
@ 2000-05-25 16:04 ` Russell King
0 siblings, 0 replies; 9+ messages in thread
From: Russell King @ 2000-05-25 16:04 UTC (permalink / raw)
To: blah; +Cc: linux-kernel, linux-mm
Benjamin C.R. LaHaise writes:
> Okay, so how about changing the SHM code to make use of pte_alloc and co?
> If we do that, then we can also make the optimisation of sharing ptes for
> really big SHM segments.
It's unneeded that its using indirect pointers - the code in no way is
reliant on a two level scheme at all.
Note that this array is only used so that SHM can keep track of the ptes
its allocated for paging them in/out of memory. It's not used as actual
page tables.
_____
|_____| ------------------------------------------------- ---+---+-
| | Russell King rmk@arm.linux.org.uk --- ---
| | | | http://www.arm.linux.org.uk/~rmk/aboutme.html / / |
| +-+-+ --- -+-
/ | THE developer of ARM Linux |+| /|\
/ | | | --- |
+-+-+ ------------------------------------------------- /\\\ |
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: shm_alloc and friends
2000-05-25 14:36 ` Russell King
2000-05-25 14:59 ` Alan Cox
@ 2000-05-26 14:07 ` Eric W. Biederman
1 sibling, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2000-05-26 14:07 UTC (permalink / raw)
To: linux-mm
Russell King <rmk@arm.linux.org.uk> writes:
> Alan Cox writes:
> > > Problems:
> > > - memsetting the vmalloced area to initialise the pte's.
> > > (Note: pte_clear can't be used, because that is expected to be used
> >
> > Sorry that breaks S/390. You cannot use memset here.
>
> Suggestions on a fix for that?
Purge pte_t from the code. Just use swp_entry_t.
The page cache has all of the necessary mechanisms for testing
to see if a page is present or not already.
That should make memset safe as well...
Eric
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2000-05-26 14:07 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2000-05-25 14:24 shm_alloc and friends Russell King
2000-05-25 14:31 ` Alan Cox
2000-05-25 14:36 ` Russell King
2000-05-25 14:59 ` Alan Cox
2000-05-25 15:20 ` Russell King
2000-05-25 15:59 ` Alan Cox
2000-05-25 16:01 ` Benjamin C.R. LaHaise
2000-05-25 16:04 ` Russell King
2000-05-26 14:07 ` Eric W. Biederman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox