diff -uaNr linux-2.4.21/Documentation/Configure.help linux-2.4.21-1APTUS/Documentation/Configure.help --- linux-2.4.21/Documentation/Configure.help 2003-06-22 18:34:45.000000000 +0800 +++ linux-2.4.21-1APTUS/Documentation/Configure.help 2003-06-22 19:57:04.000000000 +0800 @@ -12784,6 +12784,16 @@ If you are running Linux on an IBM iSeries system and you want to read a CD drive owned by OS/400, say Y here. +Swapping to block device and local filesystems +CONFIG_BLOCKDEV_SWAP + Say yes to enable virtual memory swap to block devices. If you have + a local disk drive and wish to use swap say 'Y', otherwise say 'N'. + + If local swaping is option to your system, say 'M' and compile it + as a module. + + If unsure, choose 'M' to compile it as a module for safe. + Quota support CONFIG_QUOTA If you say Y here, you will be able to set per user limits for disk diff -uaNr linux-2.4.21/fs/blkdev_swap.c linux-2.4.21-1APTUS/fs/blkdev_swap.c --- linux-2.4.21/fs/blkdev_swap.c 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.4.21-1APTUS/fs/blkdev_swap.c 2003-06-23 01:55:01.000000000 +0800 @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2000-2002 Shaolin Microsystems Ltd. + * + * Swapping to partitions or files on local disk partitions. + * + * David Chow + * + * Copied from the original fs/buffer.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG_BLKDEV_SWAP +# define dprintk(fmt...) printk(##fmt) +#else +# define dprintk(fmt...) do { /* */ } while (0) +#endif + +#define BLKDEV_SWAP_ID "blockdev" +#define BLKDEV_FILE_SWAP_ID "blockdev file" + +/* + * Helper function, copied here from buffer.c + */ + +/* + * Start I/O on a page. + * This function expects the page to be locked and may return + * before I/O is complete. You then have to check page->locked, + * page->uptodate, and maybe wait on page->wait. + * + * brw_swap_page() is SMP-safe, although it's being called with the + * kernel lock held - but the code is ready. + * + * FIXME: we need a swapper_inode->get_block function to remove + * some of the bmap kludges and interface ugliness here. + */ +int brw_swap_page(int rw, struct page *page, kdev_t dev, int b[], int size) +{ + struct buffer_head *head, *bh; + + if (!PageLocked(page)) + panic("brw_swap_page: page not locked for I/O"); + + if (!page->buffers) + create_empty_buffers(page, dev, size); + head = bh = page->buffers; + + /* Stage 1: lock all the buffers */ + do { + lock_buffer(bh); + bh->b_blocknr = *(b++); + set_bit(BH_Mapped, &bh->b_state); + set_buffer_async_io(bh); + bh = bh->b_this_page; + } while (bh != head); + + /* Stage 2: start the IO */ + do { + struct buffer_head *next = bh->b_this_page; + submit_bh(rw, bh); + bh = next; + } while (bh != head); + return 0; +} + +/* + * We implement to methods: swapping to partitions, and swapping to files + * located on partitions. + */ + +struct blkdev_swap_data { + kdev_t dev; +}; + +struct test_data { + struct file * filp; + kdev_t dev; +}; + +static int is_blkdev_swapping(unsigned int flags, + struct file * swapf, + void *data) +{ + struct test_data *testdata = (struct test_data *) data; + struct file * filp = testdata->filp; + kdev_t dev = testdata->dev; + + /* Only check filp's that don't match the one already opened + * for us by sys_swapon(). Otherwise, we will always flag a + * busy swap file. + */ + + if (swapf != filp) { + if (dev == swapf->f_dentry->d_inode->i_rdev) + return 1; + } + return 0; +} + +static int blkdev_swap_open(struct file * filp, void **dptr) +{ + int swapfilesize; + kdev_t dev; + struct blkdev_swap_data *data; + int error; + struct test_data testdata; + + MOD_INC_USE_COUNT; + + if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) { + dprintk(__FUNCTION__": can't handle this swap file: %s\n", + swapf->d_name.name); + error = 0; /* not for us */ + goto bad_swap; + } + + dev = filp->f_dentry->d_inode->i_rdev; + set_blocksize(dev, PAGE_SIZE); + error = -ENODEV; + if (!dev || + (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) { + printk("blkdev_swap_open: blkdev weirdness for %s\n", + filp->f_dentry->d_name.name); + goto bad_swap; + } + + error = -EBUSY; + /* Check to see if the device is mounted */ + if (S_ISBLK(filp->f_dentry->d_inode->i_mode) && is_mounted(dev)) { + printk("blkdev_swap_open: device already mounted, please unmount %s\n", + filp->f_dentry->d_name.name); + goto bad_swap; + } + + /* Check to make sure that we aren't already swapping. */ + testdata.filp = filp; + testdata.dev = dev; + if (swap_run_test(is_blkdev_swapping, &testdata)) { + printk("blkdev_swap_open: already swapping to %s\n", + filp->f_dentry->d_name.name); + goto bad_swap; + } + + /* Check to make sure that we aren't already swapping. */ + testdata.filp = filp; + testdata.dev = dev; + if (swap_run_test(is_blkdev_swapping, &testdata)) { + printk("blkdev_swap_open: already swapping to %s\n", + filp->f_dentry->d_name.name); + goto bad_swap; + } + + swapfilesize = 0; + if (blk_size[MAJOR(dev)]) + swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] + >> (PAGE_SHIFT - 10); + + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) { + printk("blkdev_swap_open: can't allocate data for %s\n", + filp->f_dentry->d_name.name); + error = -ENOMEM; + goto bad_swap; + } + data->dev = dev; + *dptr = data; + + dprintk("blkdev_swap_open: returning %d\n", swapfilesize); + return swapfilesize; + + bad_swap: + MOD_DEC_USE_COUNT; + return error; /* this swap thing is not for us */ +} + +static int blkdev_swap_release(struct file * filp, void *data) +{ + dprintk("blkdev_swap_release: releasing swap device %s\n", + filp->f_dentry->d_name.name); + kfree(data); + MOD_DEC_USE_COUNT; + return 0; +} + +static int blkdev_rw_page(int rw, struct page *page, unsigned long offset, + void *ptr) +{ + struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr; + brw_swap_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE); + return 1; +} + +static struct swap_ops blkdev_swap_ops = { + blkdev_swap_open, + blkdev_swap_release, + blkdev_rw_page +}; + +struct blkdevfile_swap_data { + struct inode *swapf; +}; + +static int is_blkdevfile_swapping(unsigned int flags, + struct file * swapf, + void * data) +{ + struct file * filp = (struct file *) data; + + /* Only check filp's that don't match the one already opened + * for us by sys_swapon(). Otherwise, we will always flag a + * busy swap file. + */ + + if (swapf != filp) { + if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode) + return 1; + } + return 0; +} + +static int blkdevfile_swap_open(struct file *swapf, void **dptr) +{ + int error = 0; + int swapfilesize; + struct blkdevfile_swap_data *data; + + MOD_INC_USE_COUNT; + + /* first check whether this is a regular file located on a local + * hard disk + */ + if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) { + dprintk("blkdevfile_swap_open: " + "can't handle this swap file: %s\n", + swapf->d_name.name); + error = 0; /* not for us */ + goto bad_swap; + } + if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) { + dprintk("blkdevfile_swap_open: no bmap for file: %s\n", + swapf->d_name.name); + error = 0; /* not for us */ + goto bad_swap; + } + + if (swap_run_test(is_blkdevfile_swapping, swapf)) { + dprintk("blkdevfile_swap_open: already swapping to %s\n", + swapf->d_name.name); + error = -EBUSY; + goto bad_swap; + } + swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT; + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) { + error = -ENOMEM; + goto bad_swap; + } + data->swapf = swapf->f_dentry->d_inode; + *dptr = data; + return swapfilesize; + + bad_swap: + MOD_DEC_USE_COUNT; + return error; +} + +static int blkdevfile_swap_release(struct file *swapf, void *data) +{ + kfree(data); + MOD_DEC_USE_COUNT; + return 0; +} + +static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset, + void *ptr) +{ + struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr; + struct inode * swapf = data->swapf; + int i, j; + unsigned int block = offset + << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); + kdev_t dev = swapf->i_dev; + int block_size; + int zones[PAGE_SIZE/512]; + int zones_used; + + block_size = swapf->i_sb->s_blocksize; + for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) + if (!(zones[i] = bmap(swapf,block++))) { + printk("blkdevfile_rw_page: bad swap file\n"); + return 0; + } + zones_used = i; + + /* block_size == PAGE_SIZE/zones_used */ + brw_swap_page(rw, page, dev, zones, block_size); + return 1; +} + +static struct swap_ops blkdevfile_swap_ops = { + blkdevfile_swap_open, + blkdevfile_swap_release, + blkdevfile_rw_page + }; + +int __init blkdev_swap_init(void) +{ + (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops); + (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops); + return 0; +} + +void __exit blkdev_swap_exit(void) +{ + unregister_swap_method(BLKDEV_SWAP_ID); + unregister_swap_method(BLKDEV_FILE_SWAP_ID); +} + +module_init(blkdev_swap_init) +module_exit(blkdev_swap_exit) diff -uaNr linux-2.4.21/fs/buffer.c linux-2.4.21-1APTUS/fs/buffer.c --- linux-2.4.21/fs/buffer.c 2003-06-22 17:35:00.000000000 +0800 +++ linux-2.4.21-1APTUS/fs/buffer.c 2003-06-23 01:57:44.000000000 +0800 @@ -737,7 +737,7 @@ bh->b_private = private; } -static void end_buffer_io_async(struct buffer_head * bh, int uptodate) +void end_buffer_io_async(struct buffer_head * bh, int uptodate) { static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED; unsigned long flags; diff -uaNr linux-2.4.21/fs/Config.in linux-2.4.21-1APTUS/fs/Config.in --- linux-2.4.21/fs/Config.in 2002-11-29 07:53:15.000000000 +0800 +++ linux-2.4.21-1APTUS/fs/Config.in 2003-06-23 11:08:34.000000000 +0800 @@ -4,6 +4,8 @@ mainmenu_option next_comment comment 'File systems' +tristate 'Swapping to block devices and local filesystems' CONFIG_BLOCKDEV_SWAP + bool 'Quota support' CONFIG_QUOTA tristate 'Kernel automounter support' CONFIG_AUTOFS_FS tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS diff -uaNr linux-2.4.21/fs/Makefile linux-2.4.21-1APTUS/fs/Makefile --- linux-2.4.21/fs/Makefile 2002-11-29 07:53:15.000000000 +0800 +++ linux-2.4.21-1APTUS/fs/Makefile 2003-06-22 19:53:12.000000000 +0800 @@ -82,5 +82,6 @@ # persistent filesystems obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) +obj-$(CONFIG_BLOCKDEV_SWAP) += blkdev_swap.o include $(TOPDIR)/Rules.make diff -uaNr linux-2.4.21/include/linux/fs.h linux-2.4.21-1APTUS/include/linux/fs.h --- linux-2.4.21/include/linux/fs.h 2003-06-22 17:35:06.000000000 +0800 +++ linux-2.4.21-1APTUS/include/linux/fs.h 2003-06-23 01:15:04.000000000 +0800 @@ -1128,6 +1128,7 @@ extern void refile_buffer(struct buffer_head * buf); extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern void end_buffer_io_sync(struct buffer_head *bh, int uptodate); +extern void end_buffer_io_async(struct buffer_head * bh, int uptodate); /* reiserfs_writepage needs this */ extern void set_buffer_async_io(struct buffer_head *bh) ; diff -uaNr linux-2.4.21/include/linux/swap.h linux-2.4.21-1APTUS/include/linux/swap.h --- linux-2.4.21/include/linux/swap.h 2003-06-22 17:35:07.000000000 +0800 +++ linux-2.4.21-1APTUS/include/linux/swap.h 2003-06-23 01:38:41.000000000 +0800 @@ -58,15 +58,29 @@ #define SWAP_MAP_MAX 0x7fff #define SWAP_MAP_BAD 0x8000 +struct swap_ops { + int (*open)(struct file *swapf, void **data); + int (*release)(struct file *swapf, void *data); + int (*rw_page)(int rw, + struct page *page, unsigned long offset, void *data); +}; + +struct swap_method { + struct swap_method *next; + char * name; + struct swap_ops *ops; + int use_count; +}; + /* * The in-memory structure used to track swap areas. */ struct swap_info_struct { unsigned int flags; - kdev_t swap_device; + struct file *swap_file; + struct swap_method *method; + void *data; spinlock_t sdev_lock; - struct dentry * swap_file; - struct vfsmount *swap_vfsmnt; unsigned short * swap_map; unsigned int lowest_bit; unsigned int highest_bit; @@ -142,10 +156,15 @@ extern unsigned int nr_swapfiles; extern struct swap_info_struct swap_info[]; extern int is_swap_partition(kdev_t); +extern int register_swap_method(char *name, struct swap_ops *ops); +extern int unregister_swap_method(char *name); +extern int swap_run_test(int (*test_fct)(unsigned int flags, + struct file *swap_file, + void *testdata), void *testdata); extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); -extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *, - struct inode **); +struct swap_method *get_swaphandle_info(swp_entry_t entry, + unsigned long *offset, void **data); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); diff -uaNr linux-2.4.21/kernel/ksyms.c linux-2.4.21-1APTUS/kernel/ksyms.c --- linux-2.4.21/kernel/ksyms.c 2003-06-22 18:34:45.000000000 +0800 +++ linux-2.4.21-1APTUS/kernel/ksyms.c 2003-06-23 01:02:29.000000000 +0800 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +94,10 @@ EXPORT_SYMBOL(kallsyms_symbol_to_address); EXPORT_SYMBOL(kallsyms_address_to_symbol); #endif +EXPORT_SYMBOL(nr_free_pages); +EXPORT_SYMBOL(register_swap_method); +EXPORT_SYMBOL(unregister_swap_method); +EXPORT_SYMBOL(swap_run_test); /* process memory management */ EXPORT_SYMBOL(do_mmap_pgoff); diff -uaNr linux-2.4.21/mm/page_io.c linux-2.4.21-1APTUS/mm/page_io.c --- linux-2.4.21/mm/page_io.c 2002-11-29 07:53:15.000000000 +0800 +++ linux-2.4.21-1APTUS/mm/page_io.c 2003-06-23 00:32:31.000000000 +0800 @@ -32,15 +32,15 @@ * atomic, which is particularly important when we are trying to ensure * that shared pages stay shared while being swapped. */ - +/* David: Don't know is this is corrct or not, since 2.4.14, the rw never + * checked for WRITE. Not sure will we cause a problem here if we check for + * WRITE and start I/O and wait for completion. + */ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; - int zones_used; - kdev_t dev = 0; - int block_size; - struct inode *swapf = 0; + struct swap_method *method; + void *data; if (rw == READ) { ClearPageUptodate(page); @@ -48,31 +48,21 @@ } else kstat.pswpout++; - get_swaphandle_info(entry, &offset, &dev, &swapf); - if (dev) { - zones[0] = offset; - zones_used = 1; - block_size = PAGE_SIZE; - } else if (swapf) { - int i, j; - unsigned int block = offset - << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); - - block_size = swapf->i_sb->s_blocksize; - for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) - if (!(zones[i] = bmap(swapf,block++))) { - printk("rw_swap_page: bad swap file\n"); - return 0; - } - zones_used = i; - dev = swapf->i_dev; - } else { - return 0; + method = get_swaphandle_info(entry, &offset, &data); + + if (method) { + if (!method->ops->rw_page(rw, page, offset, data)) { + return 0; + } + /* Note! For consistency we do all of the logic, + * decrementing the page count, and unlocking the page in the + * swap lock map - in the IO completion handler. + */ + return 1; + } - /* block_size == PAGE_SIZE/zones_used */ - brw_page(rw, page, dev, zones, block_size); - return 1; + return 0; } /* diff -uaNr linux-2.4.21/mm/swapfile.c linux-2.4.21-1APTUS/mm/swapfile.c --- linux-2.4.21/mm/swapfile.c 2003-06-22 17:35:08.000000000 +0800 +++ linux-2.4.21-1APTUS/mm/swapfile.c 2003-06-23 11:09:17.000000000 +0800 @@ -14,9 +14,15 @@ #include #include #include - +#include +#include +#include #include +#ifdef CONFIG_KMOD +#include +#endif + spinlock_t swaplock = SPIN_LOCK_UNLOCKED; unsigned int nr_swapfiles; int total_swap_pages; @@ -31,8 +37,78 @@ struct swap_info_struct swap_info[MAX_SWAPFILES]; +static struct swap_method *swap_methods = NULL; + #define SWAPFILE_CLUSTER 256 +int register_swap_method(char *name, struct swap_ops *ops) +{ + struct swap_method *pos; + struct swap_method *new; + int result = 0; + + lock_kernel(); + + for (pos = swap_methods; pos; pos = pos->next) { + if (strcmp(pos->name, name) == 0) { + printk(KERN_ERR "register_swap_method: " + "method %s already registered\n", name); + result = -EBUSY; + goto out; + } + } + + if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) { + printk(KERN_ERR "register_swap_method: " + "no memory for new method \"%s\"\n", name); + result = -ENOMEM; + goto out; + } + + new->name = name; + new->ops = ops; + new->use_count = 0; + + /* ok, insert at top of list */ + printk("register_swap_method: method %s\n", name); + new->next = swap_methods; + swap_methods = new; + out: + unlock_kernel(); + return result; +} + +int unregister_swap_method(char *name) +{ + struct swap_method **method, *next; + int result = 0; + + lock_kernel(); + + for (method = &swap_methods; *method; method = &(*method)->next) { + if (strcmp((*method)->name, name) == 0) { + if ((*method)->use_count > 0) { + printk(KERN_ERR "unregister_swap_method: " + "method \"%s\" is in use\n", name); + result = -EBUSY; + goto out; + } + + next = (*method)->next; + kfree(*method); + *method = next; + printk("unregister_swap_method: method %s\n", name); + goto out; + } + } + /* not found */ + printk("unregister_swap_method: no such method %s\n", name); + result = -ENOENT; + out: + unlock_kernel(); + return result; +} + static inline int scan_swap_map(struct swap_info_struct *si) { unsigned long offset; @@ -717,7 +793,7 @@ err = user_path_walk(specialfile, &nd); if (err) - goto out; + return err; lock_kernel(); prev = -1; @@ -725,15 +801,20 @@ for (type = swap_list.head; type >= 0; type = swap_info[type].next) { p = swap_info + type; if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { - if (p->swap_file == nd.dentry) - break; + if (p->swap_file && + p->swap_file->f_dentry == nd.dentry) + break; } prev = type; } err = -EINVAL; + /* p->swap_file contains all needed info, no need to keep nd, so + * release it now. + */ + path_release(&nd); if (type < 0) { swap_list_unlock(); - goto out_dput; + goto out; } if (prev < 0) { @@ -767,19 +848,18 @@ total_swap_pages += p->pages; p->flags = SWP_WRITEOK; swap_list_unlock(); - goto out_dput; + goto out; } - if (p->swap_device) - blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP); - path_release(&nd); + if (p->method->ops->release) + p->method->ops->release(p->swap_file, p->data); swap_list_lock(); swap_device_lock(p); - nd.mnt = p->swap_vfsmnt; - nd.dentry = p->swap_file; - p->swap_vfsmnt = NULL; + p->method->use_count --; + p->method = NULL; + p->data = NULL; + filp_close(p->swap_file, NULL); p->swap_file = NULL; - p->swap_device = 0; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; @@ -789,10 +869,8 @@ vfree(swap_map); err = 0; -out_dput: - unlock_kernel(); - path_release(&nd); out: + unlock_kernel(); return err; } @@ -805,18 +883,17 @@ if (!page) return -ENOMEM; - len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n"); + len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n", + "Filename", "Type", "Size", "Used"); for (i = 0 ; i < nr_swapfiles ; i++, ptr++) { if ((ptr->flags & SWP_USED) && ptr->swap_map) { - char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt, - page, PAGE_SIZE); + char * path = d_path(ptr->swap_file->f_dentry, + ptr->swap_file->f_vfsmnt, + page, PAGE_SIZE); len += sprintf(buf + len, "%-31s ", path); - if (!ptr->swap_device) - len += sprintf(buf + len, "file\t\t"); - else - len += sprintf(buf + len, "partition\t"); + len += sprintf(buf + len, "%-15s ", ptr->method->name); usedswap = 0; for (j = 0; j < ptr->max; ++j) @@ -827,7 +904,7 @@ default: usedswap++; } - len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), + len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10), usedswap << (PAGE_SHIFT - 10), ptr->prio); } } @@ -835,28 +912,89 @@ return len; } -int is_swap_partition(kdev_t dev) { +/* apply a test function to all active swap objects. E.g. for checking + * whether a partition is used for swapping + */ +int swap_run_test(int (*test_fct)(unsigned int flags, + struct file * swap_file, + void *testdata), void *testdata) +{ struct swap_info_struct *ptr = swap_info; int i; for (i = 0 ; i < nr_swapfiles ; i++, ptr++) { - if (ptr->flags & SWP_USED) - if (ptr->swap_device == dev) - return 1; + if (ptr->swap_file && + test_fct(ptr->flags, ptr->swap_file, testdata)) + return 1; } return 0; } +/* Walk through the list of known swap method until somebody wants to + * handle this file. Pick the first one which claims to be able to + * swap to this kind of file. + * + * return value: < 0: error, 0: not found, > 0: swapfilesize + */ +int find_swap_method(struct file *swap_file, + struct swap_info_struct *p) +{ + int swapfilesize = 0; + struct swap_method *method; + + p->method = NULL; + for (method = swap_methods; method; method = method->next) { + swapfilesize = method->ops->open(swap_file, &p->data); + if (swapfilesize == 0) { + continue; + } + if (swapfilesize > 0) { + p->method = method; + p->method->use_count ++; + p->swap_file = swap_file; + break; + } + if (swapfilesize < 0) { + break; + } + } + return swapfilesize; +} + +/* swap_run_test() applies this hook to all swapfiles until it returns + * "1". If it never return "1", the result of swap_run_test() is "0", + * otherwise "1". + */ +static int is_swap_partition_hook(unsigned int flags, struct file *swap_file, + void *testdata) +{ + kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode) + ? swap_file->f_dentry->d_inode->i_rdev : 0; + kdev_t dev = *((kdev_t *)testdata); + + if (flags & SWP_USED && dev == swap_dev) { + return 1; + } else { + return 0; + } +} + +/* A wrapper to the swap_run_test() which test for swap methods + */ +int is_swap_partition(kdev_t dev) { + return swap_run_test(is_swap_partition_hook, &dev); +} + /* * Written 01/25/92 by Simmule Turner, heavily changed by Linus. + * 22nd June 2003 David Chow + * Modified to enable the swap method hooks * * The swapon system call */ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) { struct swap_info_struct * p; - struct nameidata nd; - struct inode * swap_inode; unsigned int type; int i, j, prev; int error; @@ -866,9 +1004,9 @@ int nr_good_pages = 0; unsigned long maxpages = 1; int swapfilesize; - struct block_device *bdev = NULL; unsigned short *swap_map; - + char * tmp_specialfile; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; lock_kernel(); @@ -886,8 +1024,7 @@ nr_swapfiles = type+1; p->flags = SWP_USED; p->swap_file = NULL; - p->swap_vfsmnt = NULL; - p->swap_device = 0; + p->method = NULL; p->swap_map = NULL; p->lowest_bit = 0; p->highest_bit = 0; @@ -901,58 +1038,69 @@ p->prio = --least_priority; } swap_list_unlock(); - error = user_path_walk(specialfile, &nd); - if (error) - goto bad_swap_2; - - p->swap_file = nd.dentry; - p->swap_vfsmnt = nd.mnt; - swap_inode = nd.dentry->d_inode; - error = -EINVAL; + /* Open the swap using filp_open. Bail out on any errors. */ + tmp_specialfile = getname(specialfile); + if (IS_ERR(tmp_specialfile)) { + error = PTR_ERR(tmp_specialfile); + goto bad_swap_2; + } + p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600); + putname(tmp_specialfile); + if (IS_ERR(p->swap_file)) { + error = PTR_ERR(p->swap_file); + goto bad_swap_1; + } - if (S_ISBLK(swap_inode->i_mode)) { - kdev_t dev = swap_inode->i_rdev; - struct block_device_operations *bdops; - devfs_handle_t de; - - if (is_mounted(dev)) { - error = -EBUSY; - goto bad_swap_2; - } + error = -EINVAL; - p->swap_device = dev; - set_blocksize(dev, PAGE_SIZE); - - bd_acquire(swap_inode); - bdev = swap_inode->i_bdev; - de = devfs_get_handle_from_inode(swap_inode); - bdops = devfs_get_ops(de); /* Increments module use count */ - if (bdops) bdev->bd_op = bdops; - - error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP); - devfs_put_ops(de);/*Decrement module use count now we're safe*/ - if (error) - goto bad_swap_2; - set_blocksize(dev, PAGE_SIZE); - error = -ENODEV; - if (!dev || (blk_size[MAJOR(dev)] && - !blk_size[MAJOR(dev)][MINOR(dev)])) - goto bad_swap; - swapfilesize = 0; - if (blk_size[MAJOR(dev)]) - swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] - >> (PAGE_SHIFT - 10); - } else if (S_ISREG(swap_inode->i_mode)) - swapfilesize = swap_inode->i_size >> PAGE_SHIFT; - else - goto bad_swap; + swapfilesize = find_swap_method(p->swap_file, p); + if (swapfilesize < 0) { + error = swapfilesize; + filp_close(p->swap_file, NULL); + goto bad_swap_1; + } + +#ifdef CONFIG_KMOD + if (swapfilesize == 0) { + if ((p->swap_file->f_dentry->d_sb + && (p->swap_file->f_dentry->d_sb->s_type->fs_flags & FS_REQUIRES_DEV)) + || (p->swap_file->f_dentry->d_inode + && S_ISBLK(p->swap_file->f_dentry->d_inode->i_mode))) { + /* It looks like a block device filesystem */ + (void)request_module("blkdev_swap"); + } else { + /* User may specify which swap module it use */ + (void)request_module("swapfile-mod"); + } + + swapfilesize = find_swap_method(p->swap_file, p); + if (swapfilesize < 0) { + error = swapfilesize; + filp_close(p->swap_file, NULL); + goto bad_swap_1; + } + } +#endif + if (swapfilesize == 0) { + printk("Invalid swap file\n"); + filp_close(p->swap_file, NULL); + goto bad_swap_1; /* free swap map */ + } + + /* After this point, the swap-file has been opened by the swap + * method. We must make sure to use the bad_swap label for any + * errors. + */ error = -EBUSY; + for (i = 0 ; i < nr_swapfiles ; i++) { struct swap_info_struct *q = &swap_info[i]; if (i == type || !q->swap_file) continue; - if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping) + if (p->swap_file->f_dentry->d_inode->i_mapping + == + q->swap_file->f_dentry->d_inode->i_mapping) goto bad_swap; } @@ -1088,17 +1236,25 @@ swap_list_unlock(); error = 0; goto out; + bad_swap: - if (bdev) - blkdev_put(bdev, BDEV_SWAP); + if (p->method->ops->release) + p->method->ops->release(p->swap_file, p->data); + swap_list_lock(); + p->method->use_count --; + p->method = NULL; + p->data = NULL; + swap_list_unlock(); + +bad_swap_1: + swap_list_lock(); + p->swap_file = NULL; + swap_list_unlock(); + bad_swap_2: + swap_list_lock(); swap_map = p->swap_map; - nd.mnt = p->swap_vfsmnt; - nd.dentry = p->swap_file; - p->swap_device = 0; - p->swap_file = NULL; - p->swap_vfsmnt = NULL; p->swap_map = NULL; p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) @@ -1106,7 +1262,7 @@ swap_list_unlock(); if (swap_map) vfree(swap_map); - path_release(&nd); + out: if (swap_header) free_page((long) swap_header); @@ -1181,8 +1337,8 @@ /* * Prior swap_duplicate protects against swap device deletion. */ -void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, - kdev_t *dev, struct inode **swapf) +struct swap_method *get_swaphandle_info(swp_entry_t entry, + unsigned long *offset, void **data) { unsigned long type; struct swap_info_struct *p; @@ -1190,32 +1346,26 @@ type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val); - return; + return NULL; } p = &swap_info[type]; *offset = SWP_OFFSET(entry); if (*offset >= p->max && *offset != 0) { printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val); - return; + return NULL; } if (p->swap_map && !p->swap_map[*offset]) { printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val); - return; + return NULL; } if (!(p->flags & SWP_USED)) { printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val); - return; + return NULL; } - if (p->swap_device) { - *dev = p->swap_device; - } else if (p->swap_file) { - *swapf = p->swap_file->d_inode; - } else { - printk(KERN_ERR "rw_swap_page: no swap file or device\n"); - } - return; + *data = p->data; + return p->method; } /*