From mboxrd@z Thu Jan 1 00:00:00 1970 Subject: [PATCH,incomplete] shm integration into shrink_mmap References: <20000607154350.N30951@redhat.com> From: Christoph Rohland Date: 08 Jun 2000 17:04:24 +0200 In-Reply-To: "Stephen C. Tweedie"'s message of "Wed, 7 Jun 2000 15:43:50 +0100" Message-ID: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="=-=-=" Sender: owner-linux-mm@kvack.org Return-Path: To: Rik van Riel Cc: "Stephen C. Tweedie" , linux-mm@kvack.org List-ID: --=-=-= Hi Rik, Here is my first proposal for changing shm to be integrated into shrink_mmap. It gives you a function 'int shm_write_swap (struct page *page)' to write out a page to swap and replace the pte in the shm structures. I tested the stuff with no swapping and it seems stable so far. But shm_write_swap is completely untested. It probably needs to add the pages in shm_nopage_core to your lru queues and of course it needs the calls from shrink_mmap. I think it would be nicer to only have a notify function instead of shm_write_swap, which gets the page and the swap_entry and can simply put the swap_entry into the shm structures without handling the swapping at all. What do you think? Christoph --=-=-= Content-Disposition: attachment; filename=patch-shm_shrink_mmap diff -uNr 4-1-ac10/include/linux/mm.h c/include/linux/mm.h --- 4-1-ac10/include/linux/mm.h Wed Jun 7 11:47:52 2000 +++ c/include/linux/mm.h Thu Jun 8 10:20:52 2000 @@ -176,6 +176,7 @@ #define PG_skip 10 #define PG_unused_03 11 #define PG_highmem 12 +#define PG_shm 13 /* bits 21-30 unused */ #define PG_reserved 31 @@ -220,6 +221,9 @@ #define PageClearSwapCache(page) clear_bit(PG_swap_cache, &(page)->flags) #define PageTestandClearSwapCache(page) test_and_clear_bit(PG_swap_cache, &(page)->flags) + +#define PageSHM(page) test_bit(PG_shm, &(page)->flags) +#define SetPageSHM(page) set_bit(PG_shm, &(page)->flags) #ifdef CONFIG_HIGHMEM #define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) diff -uNr 4-1-ac10/ipc/shm.c c/ipc/shm.c --- 4-1-ac10/ipc/shm.c Wed Jun 7 11:43:52 2000 +++ c/ipc/shm.c Thu Jun 8 15:12:00 2000 @@ -81,6 +81,7 @@ unsigned long shm_npages; /* size of segment (pages) */ pte_t **shm_dir; /* ptr to arr of ptrs to frames */ int id; + struct address_space mapping; union permap { struct shmem { time_t atime; @@ -130,7 +131,6 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); #endif -static void zshm_swap (int prio, int gfp_mask); static void zmap_unuse(swp_entry_t entry, struct page *page); static void shmzero_open(struct vm_area_struct *shmd); static void shmzero_close(struct vm_area_struct *shmd); @@ -628,7 +628,9 @@ if (pte_none(pte)) continue; if (pte_present(pte)) { - __free_page (pte_page(pte)); + struct page *page = pte_page(pte); + page->mapping = NULL; /* make __free_pages_ok happy */ + __free_page (page); rss++; } else { swap_free(pte_to_swp_entry(pte)); @@ -744,6 +746,12 @@ shp->shm_npages = numpages; shp->shm_nattch = 0; shp->shm_namelen = namelen; + INIT_LIST_HEAD (&shp->mapping.pages); + shp->mapping.nrpages = 0; + shp->mapping.a_ops = NULL; + shp->mapping.host = (void *) shp; + shp->mapping.i_mmap = NULL; + spin_lock_init(&shp->mapping.i_shared_lock); return(shp); } @@ -1441,6 +1449,9 @@ (*swp)--; } (*rss)++; + SetPageSHM(page); + page->mapping = &shp->mapping; + page->index = idx; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); SHM_ENTRY(shp, idx) = pte; } @@ -1473,124 +1484,55 @@ return(page); } -#define OKAY 0 -#define RETRY 1 -#define FAILED 2 - -static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, int *counter, struct page **outpage) -{ - pte_t page; - struct page *page_map; - - page = SHM_ENTRY(shp, idx); - if (!pte_present(page)) - return RETRY; - page_map = pte_page(page); - if (page_map->zone->free_pages > page_map->zone->pages_high) - return RETRY; - if (shp->id != zero_id) swap_attempts++; - - if (--counter < 0) /* failed */ - return FAILED; - if (page_count(page_map) != 1) - return RETRY; - - lock_page(page_map); - if (!(page_map = prepare_highmem_swapout(page_map))) - return FAILED; - SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry); - - /* add the locked page to the swap cache before allowing - the swapin path to run lookup_swap_cache(). This avoids - reading a not yet uptodate block from disk. - NOTE: we just accounted the swap space reference for this - swap cache page at __get_swap_page() time. */ - add_to_swap_cache(*outpage = page_map, swap_entry); - return OKAY; -} - -static void shm_swap_postop(struct page *page) +int shm_write_swap (struct page *page) { - lock_kernel(); - rw_swap_page(WRITE, page, 0); - unlock_kernel(); - page_cache_release(page); -} + struct shmid_kernel *shp; + swp_entry_t swap_entry; + unsigned long idx; + + if (!PageSHM (page)) + BUG(); -static int shm_swap_preop(swp_entry_t *swap_entry) -{ lock_kernel(); /* subtle: preload the swap count for the swap cache. We can't increase the count inside the critical section as we can't release the shm_lock there. And we can't acquire the big lock with the shm_lock held (otherwise we would deadlock too easily). */ - *swap_entry = __get_swap_page(2); - if (!(*swap_entry).val) { + swap_entry = __get_swap_page(2); + if (!swap_entry.val) { unlock_kernel(); - return 1; + return 0; } unlock_kernel(); - return 0; -} - -/* - * Goes through counter = (shm_rss / (prio + 1)) present shm pages. - */ -static unsigned long swap_id; /* currently being swapped */ -static unsigned long swap_idx; /* next to swap */ -int shm_swap (int prio, int gfp_mask) -{ - struct shmid_kernel *shp; - swp_entry_t swap_entry; - unsigned long id, idx; - int loop = 0; - int counter; - struct page * page_map; - - zshm_swap(prio, gfp_mask); - counter = shm_rss / (prio + 1); - if (!counter) - return 0; - if (shm_swap_preop(&swap_entry)) - return 0; + shp = (struct shmid_kernel *) page->mapping->host; + idx = page->index; + if (shp->id != zero_id) swap_attempts++; + lock_page(page); + if (!(page = prepare_highmem_swapout(page))) + goto err; shm_lockall(); -check_id: - shp = shm_get(swap_id); - if(shp==NULL || shp->shm_flags & PRV_LOCKED) { -next_id: - swap_idx = 0; - if (++swap_id > shm_ids.max_id) { - swap_id = 0; - if (loop) { -failed: - shm_unlockall(); - __swap_free(swap_entry, 2); - return 0; - } - loop = 1; - } - goto check_id; - } - id = swap_id; - -check_table: - idx = swap_idx++; - if (idx >= shp->shm_npages) - goto next_id; - - switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) { - case RETRY: goto check_table; - case FAILED: goto failed; - } + SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry); swap_successes++; shm_swp++; shm_rss--; shm_unlockall(); - shm_swap_postop(page_map); + /* add the locked page to the swap cache before allowing + the swapin path to run lookup_swap_cache(). This avoids + reading a not yet uptodate block from disk. + NOTE: we just accounted the swap space reference for this + swap cache page at __get_swap_page() time. */ + add_to_swap_cache(page, swap_entry); + lock_kernel(); + rw_swap_page(WRITE, page, 0); + unlock_kernel(); + page_cache_release(page); return 1; +err: + __swap_free(swap_entry, 2); + return 0; } /* @@ -1718,7 +1660,6 @@ #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data) static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED; -static unsigned long zswap_idx; /* next to swap */ static struct shmid_kernel *zswap_shp = &zshmid_kernel; static int zshm_rss; @@ -1864,63 +1805,5 @@ } shm_unlock(zero_id); spin_unlock(&zmap_list_lock); -} - -static void zshm_swap (int prio, int gfp_mask) -{ - struct shmid_kernel *shp; - swp_entry_t swap_entry; - unsigned long idx; - int loop = 0; - int counter; - struct page * page_map; - - counter = zshm_rss / (prio + 1); - if (!counter) - return; -next: - if (shm_swap_preop(&swap_entry)) - return; - - spin_lock(&zmap_list_lock); - shm_lock(zero_id); - if (zshmid_kernel.zero_list.next == 0) - goto failed; -next_id: - if (zswap_shp == &zshmid_kernel) { - if (loop) { -failed: - shm_unlock(zero_id); - spin_unlock(&zmap_list_lock); - __swap_free(swap_entry, 2); - return; - } - zswap_shp = list_entry(zshmid_kernel.zero_list.next, - struct shmid_kernel, zero_list); - zswap_idx = 0; - loop = 1; - } - shp = zswap_shp; - -check_table: - idx = zswap_idx++; - if (idx >= shp->shm_npages) { - zswap_shp = list_entry(zswap_shp->zero_list.next, - struct shmid_kernel, zero_list); - zswap_idx = 0; - goto next_id; - } - - switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) { - case RETRY: goto check_table; - case FAILED: goto failed; - } - shm_unlock(zero_id); - spin_unlock(&zmap_list_lock); - - shm_swap_postop(page_map); - if (counter) - goto next; - return; } diff -uNr 4-1-ac10/ipc/util.c c/ipc/util.c --- 4-1-ac10/ipc/util.c Mon Jun 5 11:12:29 2000 +++ c/ipc/util.c Thu Jun 8 13:27:27 2000 @@ -243,11 +243,6 @@ return; } -int shm_swap (int prio, int gfp_mask) -{ - return 0; -} - asmlinkage long sys_semget (key_t key, int nsems, int semflg) { return -ENOSYS; diff -uNr 4-1-ac10/mm/vmscan.c c/mm/vmscan.c --- 4-1-ac10/mm/vmscan.c Mon Jun 5 11:12:29 2000 +++ c/mm/vmscan.c Thu Jun 8 13:28:17 2000 @@ -471,11 +471,6 @@ ret = 1; goto done; } - while (shm_swap(priority, gfp_mask)) { - ret = 1; - if (!--count) - goto done; - } } /* --=-=-=-- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux.eu.org/Linux-MM/