* [RFC][PATCH 1/6] compcache: compressed RAM block device
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
@ 2008-03-20 20:04 ` Nitin Gupta
2008-03-20 20:06 ` [RFC][PATCH 2/6] compcache: block device - internal defs Nitin Gupta
` (5 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:04 UTC (permalink / raw)
To: linux-mm
This creates RAM based block device (called ramzswap0) which is used as swap disk.
On write (swap-out):
- compress page (using LZO)
- Allocate required amount of memory (using TLSF)
- Store reference to its location in simple array.
On read (swap-in):
- Get compressed page location from array
- Decompress this page.
It also makes required Makefile changes.
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
drivers/block/Kconfig | 10 +
drivers/block/Makefile | 1 +
drivers/block/compcache.c | 440 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 451 insertions(+), 0 deletions(-)
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 0d1d213..0cba42f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -347,6 +347,16 @@ config BLK_DEV_RAM_SIZE
The default value is 4096 kilobytes. Only change this if you know
what you are doing.
+config BLK_DEV_COMPCACHE
+ tristate "Compressed RAM based swap device"
+ select TLSF
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ help
+ This creates RAM based block device which acts as swap disk. Pages
+ swapped to this disk are compressed and stored in memory itself.
+ Project Home: http://code.google.com/p/compcache/
+
config BLK_DEV_XIP
bool "Support XIP filesystems on RAM block device"
depends on BLK_DEV_RAM
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 5e58430..b6d3dd2 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_PS3_DISK) += ps3disk.o
obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
+obj-$(CONFIG_BLK_DEV_COMPCACHE) += compcache.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
obj-$(CONFIG_BLK_DEV_XD) += xd.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
diff --git a/drivers/block/compcache.c b/drivers/block/compcache.c
new file mode 100644
index 0000000..4ffcd63
--- /dev/null
+++ b/drivers/block/compcache.c
@@ -0,0 +1,440 @@
+/*
+ * Compressed RAM based swap device
+ *
+ * (C) Nitin Gupta
+ *
+ * This RAM based block device acts as swap disk.
+ * Pages swapped to this device are compressed and
+ * stored in memory.
+ *
+ * Project home: http://code.google.com/p/compcache
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/highmem.h>
+#include <linux/lzo.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/swap.h>
+#include <linux/tlsf.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/string.h>
+
+#include "compcache.h"
+
+static struct block_device_operations compcache_devops = {
+ .owner = THIS_MODULE,
+};
+
+static struct compcache compcache;
+static unsigned long compcache_size_kbytes;
+#if STATS
+static struct compcache_stats stats;
+#endif
+
+#ifdef CONFIG_COMPCACHE_PROC
+static struct proc_dir_entry *proc;
+
+static int proc_compcache_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+#if STATS
+ size_t succ_writes;
+ unsigned int good_compress_perc = 0, no_compress_perc = 0;
+#endif
+
+ if (off > 0) {
+ *eof = 1;
+ return 0;
+ }
+
+ len = sprintf(page,
+ "DiskSize: %8u kB\n",
+ compcache.size >> (10 - SECTOR_SHIFT));
+#if STATS
+ succ_writes = stats.num_writes - stats.failed_writes;
+ if (succ_writes) {
+ good_compress_perc = stats.good_compress * 100 / succ_writes;
+ no_compress_perc = stats.pages_expand * 100 / succ_writes;
+ }
+
+ len += sprintf(page + len,
+ "NumReads: %8u\n"
+ "NumWrites: %8u\n"
+ "FailedReads: %8u\n"
+ "FailedWrites: %8u\n"
+ "InvalidIO: %8u\n"
+ "GoodCompress: %8u %%\n"
+ "NoCompress: %8u %%\n"
+ "CurrentPages: %8zu\n"
+ "CurrentMem: %8zu kB\n"
+ "PeakMem: %8zu kB\n",
+ stats.num_reads,
+ stats.num_writes,
+ stats.failed_reads,
+ stats.failed_writes,
+ stats.invalid_io,
+ good_compress_perc,
+ no_compress_perc,
+ stats.curr_pages,
+ K(stats.curr_mem),
+ K(stats.peak_mem));
+#endif
+ return len;
+}
+#endif /* CONFIG_COMPCACHE_PROC */
+
+/* Check if request is within bounds and page aligned */
+static inline int valid_swap_request(struct bio *bio)
+{
+ if (unlikely((bio->bi_sector >= compcache.size) ||
+ (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
+ (bio->bi_vcnt != 1) ||
+ (bio->bi_size != PAGE_SIZE) ||
+ (bio->bi_io_vec[0].bv_offset != 0)))
+ return 0;
+ return 1;
+}
+
+static int compcache_make_request(struct request_queue *queue, struct bio *bio)
+{
+ int ret;
+ size_t clen, page_no;
+ void *user_mem;
+ struct page *page;
+
+ if (!valid_swap_request(bio)) {
+ stat_inc(&stats.invalid_io);
+ goto out_nomap;
+ }
+
+ CC_DEBUG2("bio sector: %lu (%s)\n", (unsigned long)bio->bi_sector,
+ bio_data_dir(bio) == READ ? "R" : "W");
+
+ page = bio->bi_io_vec[0].bv_page;
+ page_no = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ user_mem = kmap(page);
+
+ if (bio_data_dir(bio) == READ) {
+ stat_inc(&stats.num_reads);
+ /*
+ * This is attempt to read before any previous write
+ * to this location. This happens due to readahead when
+ * swap device is read from user-space (e.g. during swapon)
+ */
+ if (unlikely(compcache.table[page_no].addr == NULL)) {
+ CC_DEBUG("Read before write on swap device: "
+ "sector=%lu, size=%zu, offset=%zu\n",
+ (unsigned long)(bio->bi_sector),
+ bio->bi_size,
+ bio->bi_io_vec[0].bv_offset);
+ memset(user_mem, 0, PAGE_SIZE);
+ kunmap(page);
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio_endio(bio, 0);
+ return 0;
+ }
+
+ /* Page is stored uncompressed since its incompressible */
+ if (unlikely(compcache.table[page_no].len == PAGE_SIZE)) {
+ memcpy(user_mem, compcache.table[page_no].addr,
+ PAGE_SIZE);
+ kunmap(page);
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio_endio(bio, 0);
+ return 0;
+ }
+
+ clen = PAGE_SIZE;
+ ret = lzo1x_decompress_safe(
+ compcache.table[page_no].addr,
+ compcache.table[page_no].len,
+ user_mem,
+ &clen);
+
+ /* should NEVER happen */
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err(C "Decompression failed! "
+ "err=%d, page=%zu, len=%u\n", ret, page_no,
+ compcache.table[page_no].len);
+ stat_inc(&stats.failed_reads);
+ goto out;
+ }
+
+ CC_DEBUG2("Page decompressed: page_no=%zu\n", page_no);
+ kunmap(page);
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio_endio(bio, 0);
+ return 0;
+ } else { /* Write */
+ unsigned char *src = compcache.compress_buffer;
+ stat_inc(&stats.num_writes);
+ /*
+ * System swaps to same sector again when the stored page
+ * is no longer referenced by any process. So, its now safe
+ * to free the memory that was allocated for this page.
+ */
+ if (compcache.table[page_no].addr) {
+ CC_DEBUG2("Freeing page: %zu\n", page_no);
+ tlsf_free(compcache.table[page_no].addr,
+ compcache.mem_pool);
+ stat_dec(&stats.curr_pages);
+ compcache.table[page_no].addr = NULL;
+ compcache.table[page_no].len = 0;
+ }
+
+ mutex_lock(&compcache.lock);
+ ret = lzo1x_1_compress(user_mem, PAGE_SIZE,
+ src, &clen, compcache.compress_workmem);
+ if (unlikely(ret != LZO_E_OK)) {
+ mutex_unlock(&compcache.lock);
+ pr_err(C "Compression failed! err=%d\n", ret);
+ compcache.table[page_no].addr = NULL;
+ compcache.table[page_no].len = 0;
+ stat_inc(&stats.failed_writes);
+ goto out;
+ }
+
+ /* Page is incompressible - store it as is */
+ if (clen >= PAGE_SIZE) {
+ CC_DEBUG("Page expand on compression: "
+ "page=%zu, size=%zu\n", page_no, clen);
+ clen = PAGE_SIZE;
+ src = user_mem;
+ } else {
+ CC_DEBUG2("Page compressed: page_no=%zu, len=%zu\n",
+ page_no, clen);
+ }
+ if ((compcache.table[page_no].addr = tlsf_malloc(clen,
+ compcache.mem_pool)) == NULL) {
+ mutex_unlock(&compcache.lock);
+ pr_err(C "Error allocating memory for compressed "
+ "page: %zu, size=%zu \n", page_no, clen);
+ compcache.table[page_no].len = 0;
+ stat_inc(&stats.failed_writes);
+ goto out;
+ }
+
+ memcpy(compcache.table[page_no].addr, src, clen);
+
+ /* Update stats */
+ stat_inc(&stats.curr_pages);
+ stat_set(&stats.curr_mem, stats.curr_mem + clen);
+ stat_setmax(&stats.peak_mem, stats.curr_mem);
+ stat_inc_if_less(&stats.pages_expand, PAGE_SIZE - 1, clen);
+ stat_inc_if_less(&stats.good_compress, clen,
+ PAGE_SIZE / 2 + 1);
+ mutex_unlock(&compcache.lock);
+
+ compcache.table[page_no].len = clen;
+
+ kunmap(page);
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio_endio(bio, 0);
+ return 0;
+ }
+out:
+ kunmap(page);
+out_nomap:
+ bio_io_error(bio);
+ return 0;
+}
+
+static void setup_swap_header(union swap_header *s)
+{
+ s->info.version = 1;
+ s->info.last_page = compcache.size >> SECTORS_PER_PAGE_SHIFT;
+ s->info.nr_badpages = 0;
+ memcpy(s->magic.magic, "SWAPSPACE2", 10);
+}
+
+static void *get_mem(size_t size)
+{
+ return __vmalloc(size, GFP_NOIO, PAGE_KERNEL);
+}
+
+static void put_mem(void *ptr)
+{
+ vfree(ptr);
+}
+
+static int __init compcache_init(void)
+{
+ int ret;
+ size_t num_pages;
+ struct sysinfo i;
+
+ mutex_init(&compcache.lock);
+
+ if (compcache_size_kbytes == 0) {
+ pr_info(C "compcache size not provided."
+ " Using default: (%u%% of Total RAM).\n"
+ "Use compcache_size_kbytes module param to specify"
+ " custom size\n", DEFAULT_COMPCACHE_PERCENT);
+ si_meminfo(&i);
+ compcache_size_kbytes = ((DEFAULT_COMPCACHE_PERCENT *
+ i.totalram) / 100) << (PAGE_SHIFT - 10);
+ }
+
+ CC_DEBUG2("compcache_size_kbytes=%lu\n", compcache_size_kbytes);
+ compcache.size = compcache_size_kbytes << 10;
+ compcache.size = (compcache.size + PAGE_SIZE - 1) & PAGE_MASK;
+ pr_info(C "Compressed swap size set to: %zu KB\n", compcache.size >> 10);
+ compcache.size >>= SECTOR_SHIFT;
+
+ compcache.compress_workmem = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ if (compcache.compress_workmem == NULL) {
+ pr_err(C "Error allocating compressor working memory\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ compcache.compress_buffer = kmalloc(2 * PAGE_SIZE, GFP_KERNEL);
+ if (compcache.compress_buffer == NULL) {
+ pr_err(C "Error allocating compressor buffer space\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ num_pages = compcache.size >> SECTORS_PER_PAGE_SHIFT;
+ compcache.table = vmalloc(num_pages * sizeof(*compcache.table));
+ if (compcache.table == NULL) {
+ pr_err(C "Error allocating compcache address table\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(compcache.table, 0, num_pages * sizeof(*compcache.table));
+
+ compcache.table[0].addr = (void *)get_zeroed_page(GFP_KERNEL);
+ if (compcache.table[0].addr == NULL) {
+ pr_err(C "Error allocating swap header page\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+ compcache.table[0].len = PAGE_SIZE;
+ setup_swap_header((union swap_header *)(compcache.table[0].addr));
+
+ compcache.disk = alloc_disk(1);
+ if (compcache.disk == NULL) {
+ pr_err(C "Error allocating disk structure\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ compcache.disk->first_minor = 0;
+ compcache.disk->fops = &compcache_devops;
+ /*
+ * It is named like this to prevent distro installers
+ * from offering compcache as installation target. They
+ * seem to ignore all devices beginning with 'ram'
+ */
+ sprintf(compcache.disk->disk_name, "%s", "ramzswap0");
+
+ compcache.disk->major = register_blkdev(0, compcache.disk->disk_name);
+ if (compcache.disk->major < 0) {
+ pr_err(C "Cannot register block device\n");
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ compcache.disk->queue = blk_alloc_queue(GFP_KERNEL);
+ if (compcache.disk->queue == NULL) {
+ pr_err(C "Cannot register disk queue\n");
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ set_capacity(compcache.disk, compcache.size);
+ blk_queue_make_request(compcache.disk->queue, compcache_make_request);
+ blk_queue_hardsect_size(compcache.disk->queue, PAGE_SIZE);
+ add_disk(compcache.disk);
+
+ compcache.mem_pool = tlsf_create_memory_pool("compcache",
+ get_mem, put_mem,
+ INIT_SIZE, 0, GROW_SIZE);
+ if (compcache.mem_pool == NULL) {
+ pr_err(C "Error creating memory pool\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+#ifdef CONFIG_COMPCACHE_PROC
+ proc = create_proc_entry("compcache", S_IRUGO, NULL);
+ if (proc)
+ proc->read_proc = &proc_compcache_read;
+ else {
+ ret = -ENOMEM;
+ pr_warning(C "Error creating proc entry\n");
+ goto fail;
+ }
+#endif
+
+ CC_DEBUG(C "Initialization done!\n");
+ return 0;
+
+fail:
+ if (compcache.disk != NULL) {
+ if (compcache.disk->major > 0)
+ unregister_blkdev(compcache.disk->major,
+ compcache.disk->disk_name);
+ del_gendisk(compcache.disk);
+ }
+
+ if (compcache.table[0].addr)
+ free_page((unsigned long)compcache.table[0].addr);
+ if (compcache.compress_workmem)
+ kfree(compcache.compress_workmem);
+ if (compcache.compress_buffer)
+ kfree(compcache.compress_buffer);
+ if (compcache.table)
+ vfree(compcache.table);
+ if (compcache.mem_pool)
+ tlsf_destroy_memory_pool(compcache.mem_pool);
+#ifdef CONFIG_COMPCACHE_PROC
+ if (proc)
+ remove_proc_entry("compcache", &proc_root);
+#endif
+ pr_err(C "Initialization failed: err=%d\n", ret);
+ return ret;
+}
+
+static void __exit compcache_exit(void)
+{
+ size_t i, num_pages;
+ num_pages = compcache.size >> SECTORS_PER_PAGE_SHIFT;
+
+ unregister_blkdev(compcache.disk->major, compcache.disk->disk_name);
+ del_gendisk(compcache.disk);
+ free_page((unsigned long)compcache.table[0].addr);
+ kfree(compcache.compress_workmem);
+ kfree(compcache.compress_buffer);
+
+ /* Free all pages that are still in compcache */
+ for (i = 1; i < num_pages; i++)
+ if (compcache.table[i].addr)
+ tlsf_free(compcache.table[i].addr, compcache.mem_pool);
+ vfree(compcache.table);
+ tlsf_destroy_memory_pool(compcache.mem_pool);
+
+#ifdef CONFIG_COMPCACHE_PROC
+ remove_proc_entry("compcache", &proc_root);
+#endif
+ CC_DEBUG("cleanup done!\n");
+}
+
+module_param(compcache_size_kbytes, ulong, 0);
+MODULE_PARM_DESC(compcache_size_kbytes, "compcache device size (in KB)");
+
+module_init(compcache_init);
+module_exit(compcache_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nitin Gupta <nitingupta910 at gmail dot com>");
+MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* [RFC][PATCH 2/6] compcache: block device - internal defs
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
2008-03-20 20:04 ` [RFC][PATCH 1/6] compcache: compressed RAM block device Nitin Gupta
@ 2008-03-20 20:06 ` Nitin Gupta
2008-03-20 20:08 ` [RFC][PATCH 3/6] compcache: TLSF Allocator interface Nitin Gupta
` (4 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:06 UTC (permalink / raw)
To: linux-mm
This contains header to be used internally by block device code.
It contains flags to enable/disable debugging, stats collection and also
defines default disk size (25% of total RAM).
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
drivers/block/compcache.h | 147 +++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 147 insertions(+), 0 deletions(-)
diff --git a/drivers/block/compcache.h b/drivers/block/compcache.h
new file mode 100644
index 0000000..b84b5d3
--- /dev/null
+++ b/drivers/block/compcache.h
@@ -0,0 +1,147 @@
+/*
+ * Compressed RAM based swap device
+ *
+ * (C) Nitin Gupta
+ *
+ * This RAM based block device acts as swap disk.
+ * Pages swapped to this device are compressed and
+ * stored in memory.
+ *
+ * Project home: http://code.google.com/p/compcache
+ */
+
+#ifndef _COMPCACHE_H_
+#define _COMPCACHE_H_
+
+#define K(x) ((x) >> 10)
+#define KB(x) ((x) << 10)
+
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
+
+/*-- Configurable parameters */
+/* Default compcache size: 25% of total RAM */
+#define DEFAULT_COMPCACHE_PERCENT 25
+#define INIT_SIZE KB(16)
+#define GROW_SIZE INIT_SIZE
+/*-- */
+
+/* Message prefix */
+#define C "compcache: "
+
+/* Debugging and Stats */
+#define NOP do { } while(0)
+
+#if (1 || defined(CONFIG_DEBUG_COMPCACHE))
+#define DEBUG 1
+#define STATS 1
+#else
+#define DEBUG 0
+#define STATS 0
+#endif
+
+/* Create /proc/compcache? */
+/* If STATS is disabled, this will give minimal compcache info */
+#define CONFIG_COMPCACHE_PROC
+
+#if DEBUG
+#define CC_DEBUG(fmt,arg...) \
+ printk(KERN_DEBUG C fmt,##arg)
+#else
+#define CC_DEBUG(fmt,arg...) NOP
+#endif
+
+/*
+ * Verbose debugging:
+ * Enable basic debugging + verbose messages spread all over code
+ */
+#define DEBUG2 0
+
+#if DEBUG2
+#define DEBUG 1
+#define STATS 1
+#define CONFIG_COMPCACHE_PROC 1
+#define CC_DEBUG2((fmt,arg...) \
+ printk(KERN_DEBUG C fmt,##arg)
+#else /* DEBUG2 */
+#define CC_DEBUG2(fmt,arg...) NOP
+#endif
+
+/* Its useless to collect stats if there is no way to export it */
+#if (STATS && !defined(CONFIG_COMPCACHE_PROC))
+#error "compcache stats is enabled but not /proc/compcache."
+#endif
+
+#if STATS
+static inline void stat_inc_if_less(size_t *stat, const size_t val1,
+ const size_t val2)
+{
+ *stat += ((val1 < val2) ? 1 : 0);
+}
+
+static inline void stat_inc(size_t *stat)
+{
+ ++*stat;
+}
+
+static inline void stat_dec(size_t *stat)
+{
+ BUG_ON(*stat == 0);
+ --*stat;
+}
+
+static inline void stat_set(size_t *stat, const size_t val)
+{
+ *stat = val;
+}
+
+static inline void stat_setmax(size_t *max, const size_t cur)
+{
+ *max = (cur > *max) ? cur : *max;
+}
+#else /* STATS */
+#define stat_inc(x) NOP
+#define stat_dec(x) NOP
+#define stat_set(x, v) NOP
+#define stat_setmax(x, v) NOP
+#define stat_inc_if_less(x, v1, v2) NOP
+#endif /* STATS */
+
+/*-- Data structures */
+/* Indexed by page no. */
+struct table {
+ void *addr;
+ unsigned short len;
+} __attribute__ ((packed));
+
+struct compcache {
+ void *mem_pool;
+ void *compress_workmem;
+ void *compress_buffer;
+ struct table *table;
+ struct mutex lock;
+ struct gendisk *disk;
+ size_t size; /* In sectors */
+};
+
+#if STATS
+struct compcache_stats {
+ u32 num_reads; /* failed + successful */
+ u32 num_writes; /* --do-- */
+ u32 failed_reads; /* can happen when memory is tooo low */
+ u32 failed_writes; /* should NEVER! happen */
+ u32 invalid_io; /* non-swap I/O requests */
+ u32 good_compress; /* no. of pages with compression
+ * ratio <= 50%. TODO: export full
+ * compressed page size histogram */
+ u32 pages_expand; /* no. of incompressible pages */
+ size_t curr_pages; /* current no. of compressed pages */
+ size_t curr_mem; /* current total size of compressed pages */
+ size_t peak_mem;
+};
+#endif
+/*-- */
+
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* [RFC][PATCH 3/6] compcache: TLSF Allocator interface
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
2008-03-20 20:04 ` [RFC][PATCH 1/6] compcache: compressed RAM block device Nitin Gupta
2008-03-20 20:06 ` [RFC][PATCH 2/6] compcache: block device - internal defs Nitin Gupta
@ 2008-03-20 20:08 ` Nitin Gupta
2008-03-20 20:10 ` [RFC][PATCH 4/6] compcache: TLSF Allocator Nitin Gupta
` (3 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:08 UTC (permalink / raw)
To: linux-mm
Two Level Segregate Fit (TLSF) Allocator is used to allocate memory for
variable size compressed pages. Its fast and gives low fragmentation.
Following links give details on this allocator:
- http://rtportal.upv.es/rtmalloc/files/tlsf_paper_spe_2007.pdf
- http://code.google.com/p/compcache/wiki/TLSFAllocator
This kernel port of TLSF (v2.3.2) introduces several changes but underlying
algorithm remains the same.
Changelog TLSF v2.3.2 vs this kernel port
- Pool now dynamically expands/shrinks.
It is collection of contiguous memory regions.
- Changes to pool create interface as a result of above change.
- Collect and export stats (/proc/tlsfinfo)
- Cleanups: kernel coding style, added comments, macros -> static inline, etc.
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
include/linux/tlsf.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 93 insertions(+), 0 deletions(-)
diff --git a/include/linux/tlsf.h b/include/linux/tlsf.h
new file mode 100644
index 0000000..ef8092c
--- /dev/null
+++ b/include/linux/tlsf.h
@@ -0,0 +1,93 @@
+/*
+ * Two Levels Segregate Fit memory allocator (TLSF)
+ * Version 2.3.2
+ *
+ * Written by Miguel Masmano Tello <mimastel@doctor.upv.es>
+ *
+ * Thanks to Ismael Ripoll for his suggestions and reviews
+ *
+ * Copyright (C) 2007, 2006, 2005, 2004
+ *
+ * This code is released using a dual license strategy: GPL/LGPL
+ * You can choose the licence that better fits your requirements.
+ *
+ * Released under the terms of the GNU General Public License Version 2.0
+ * Released under the terms of the GNU Lesser General Public License Version 2.1
+ *
+ * This is kernel port of TLSF allocator.
+ * Original code can be found at: http://rtportal.upv.es/rtmalloc/
+ * - Nitin Gupta (nitingupta910 at gmail dot com)
+ */
+
+#ifndef _TLSF_H_
+#define _TLSF_H_
+
+typedef void* (get_memory)(size_t bytes);
+typedef void (put_memory)(void *ptr);
+
+/**
+ * tlsf_create_memory_pool - create dynamic memory pool
+ * @name: name of the pool
+ * @get_mem: callback function used to expand pool
+ * @put_mem: callback function used to shrink pool
+ * @init_size: inital pool size (in bytes)
+ * @max_size: maximum pool size (in bytes) - set this as 0 for no limit
+ * @grow_size: amount of memory (in bytes) added to pool whenever required
+ *
+ * All size values are rounded up to next page boundary.
+ */
+extern void *tlsf_create_memory_pool(const char *name,
+ get_memory get_mem,
+ put_memory put_mem,
+ size_t init_size,
+ size_t max_size,
+ size_t grow_size);
+/**
+ * tlsf_destory_memory_pool - cleanup given pool
+ * @mem_pool: Pool to be destroyed
+ *
+ * Data structures associated with pool are freed.
+ * All memory allocated from pool must be freed before
+ * destorying it.
+ */
+extern void tlsf_destroy_memory_pool(void *mem_pool);
+
+/**
+ * tlsf_malloc - allocate memory from given pool
+ * @size: no. of bytes
+ * @mem_pool: pool to allocate from
+ */
+extern void *tlsf_malloc(size_t size, void *mem_pool);
+
+/**
+ * tlsf_calloc - allocate and zero-out memory from given pool
+ * @size: no. of bytes
+ * @mem_pool: pool to allocate from
+ */
+extern void *tlsf_calloc(size_t nelem, size_t elem_size, void *mem_pool);
+
+/**
+ * tlsf_free - free memory from given pool
+ * @ptr: address of memory to be freed
+ * @mem_pool: pool to free from
+ */
+extern void tlsf_free(void *ptr, void *mem_pool);
+
+/**
+ * tlsf_get_used_size - get memory currently used by given pool
+ *
+ * Used memory includes stored data + metadata + internal fragmentation
+ */
+extern size_t tlsf_get_used_size(void *mem_pool);
+
+/**
+ * tlsf_get_total_size - get total memory currently allocated for given pool
+ *
+ * This is the total memory currently allocated for this pool which includes
+ * used size + free size.
+ *
+ * (Total - Used) is good indicator of memory efficiency of allocator.
+ */
+extern size_t tlsf_get_total_size(void *mem_pool);
+
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* [RFC][PATCH 4/6] compcache: TLSF Allocator
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
` (2 preceding siblings ...)
2008-03-20 20:08 ` [RFC][PATCH 3/6] compcache: TLSF Allocator interface Nitin Gupta
@ 2008-03-20 20:10 ` Nitin Gupta
2008-03-20 20:11 ` [RFC][PATCH 5/6] compcache: TLSF Allocator - internal defs Nitin Gupta
` (2 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:10 UTC (permalink / raw)
To: linux-mm
This is main TLSF Allocator code.
It is designed specifically for embedded devices.
See:
http://rtportal.upv.es/rtmalloc/
for more information.
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
init/Kconfig | 10 +
mm/Makefile | 1 +
mm/tlsf.c | 668 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 679 insertions(+), 0 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index a97924b..5f48000 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -734,6 +734,16 @@ config SLOB
endchoice
+config TLSF
+ depends on EMBEDDED
+ tristate "TLSF Allocator"
+ help
+ Two Level Segregate Fit Allocator. Its fast and gives low
+ fragmentation. See:
+ http://rtportal.upv.es/rtmalloc/
+ http://code.google.com/p/compcache/wiki/TLSFAllocator
+ for more information.
+
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
diff --git a/mm/Makefile b/mm/Makefile
index a5b0dd9..4e2398f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_TLSF) += tlsf.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
diff --git a/mm/tlsf.c b/mm/tlsf.c
new file mode 100644
index 0000000..6679886
--- /dev/null
+++ b/mm/tlsf.c
@@ -0,0 +1,668 @@
+/*
+ * Two Levels Segregate Fit memory allocator (TLSF)
+ * Version 2.3.2
+ *
+ * Written by Miguel Masmano Tello <mimastel@doctor.upv.es>
+ *
+ * Thanks to Ismael Ripoll for his suggestions and reviews
+ *
+ * Copyright (C) 2007, 2006, 2005, 2004
+ *
+ * This code is released using a dual license strategy: GPL/LGPL
+ * You can choose the licence that better fits your requirements.
+ *
+ * Released under the terms of the GNU General Public License Version 2.0
+ * Released under the terms of the GNU Lesser General Public License Version 2.1
+ *
+ * This is kernel port of TLSF allocator.
+ * Original code can be found at: http://rtportal.upv.es/rtmalloc/
+ * - Nitin Gupta (nitingupta910 at gmail dot com)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/tlsf.h>
+
+#include "tlsf_int.h"
+
+static struct mutex pool_list_lock;
+static struct list_head pool_list_head;
+
+#ifdef CONFIG_TLSFINFO
+static struct proc_dir_entry *proc;
+
+/* Print in format similar to /proc/slabinfo -- easy to awk */
+static void print_tlsfinfo_header(struct seq_file *tlsf)
+{
+ seq_puts(tlsf, "# Size in kB\n");
+ seq_puts(tlsf, "# name <init_size> <max_size> <grow_size>"
+ " <used_size> <total_size> <extra>");
+#if STATS
+ seq_puts(tlsf, " <peak_used> <peak_total> <peak_extra>"
+ " <count_alloc> <count_free> <count_region_alloc>"
+ " <count_region_free> <failed_alloc>");
+#endif
+#if DEBUG
+ seq_puts(tlsf, " <valid>");
+#endif
+
+ seq_putc(tlsf, '\n');
+}
+
+/* Get pool no. (*pos) from pool list */
+static void *tlsf_start(struct seq_file *tlsf, loff_t *pos)
+{
+ struct list_head *lp;
+ loff_t p = *pos;
+
+ mutex_lock(&pool_list_lock);
+ if (!p)
+ return SEQ_START_TOKEN;
+
+ list_for_each(lp, &pool_list_head) {
+ if (!--p)
+ return lp;
+ }
+
+ return NULL;
+}
+
+/* Get pool next to the one given by tlsf_start() */
+static void *tlsf_next(struct seq_file *tlsf, void *v, loff_t *pos)
+{
+ struct list_head *lp;
+
+ if (v == SEQ_START_TOKEN)
+ lp = &pool_list_head;
+ else
+ lp = v;
+
+ lp = lp->next;
+ if (lp == &pool_list_head)
+ return NULL;
+
+ ++*pos;
+ return list_entry(lp, struct pool, list);
+}
+
+static void tlsf_stop(struct seq_file *tlsf, void *v)
+{
+ mutex_unlock(&pool_list_lock);
+}
+
+/* Display stats for pool given by tlsf_next() */
+static int tlsf_show(struct seq_file *tlsf, void *v)
+{
+ struct pool *pool;
+ size_t used, total;
+ if (v == SEQ_START_TOKEN) {
+ print_tlsfinfo_header(tlsf);
+ return 0;
+ }
+
+ pool = v;
+ used = tlsf_get_used_size(pool);
+ total = tlsf_get_total_size(pool);
+ seq_printf(tlsf, "%-16s %6zu %6zu %6zu %6zu %6zu %6zu",
+ pool->name,
+ K(pool->init_size),
+ K(pool->max_size),
+ K(pool->grow_size),
+ K(used),
+ K(total),
+ K(total - used));
+
+#if STATS
+ seq_printf(tlsf, " %6zu %6zu %6zu %6zu %6zu %6zu %6zu %6zu",
+ K(pool->peak_used),
+ K(pool->peak_total),
+ K(pool->peak_extra),
+ pool->count_alloc,
+ pool->count_free,
+ pool->count_region_alloc,
+ pool->count_region_free,
+ pool->count_failed_alloc);
+#endif
+
+#if DEBUG
+ seq_printf(tlsf, " %u", pool->valid);
+#endif
+
+ seq_putc(tlsf, '\n');
+ return 0;
+}
+
+static struct seq_operations tlsfinfo_op = {
+ .start = tlsf_start,
+ .next = tlsf_next,
+ .stop = tlsf_stop,
+ .show = tlsf_show,
+};
+
+static int tlsfinfo_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &tlsfinfo_op);
+}
+
+static const struct file_operations proc_tlsfinfo_operations = {
+ .open = tlsfinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* CONFIG_TLSFINFO */
+
+/*
+ * Helping functions
+ */
+
+/**
+ * Returns indexes (fl, sl) of the list used to serve request of size r
+ */
+static inline void MAPPING_SEARCH(size_t *r, int *fl, int *sl)
+{
+ int t;
+
+ if (*r < SMALL_BLOCK) {
+ *fl = 0;
+ *sl = *r / (SMALL_BLOCK / MAX_SLI);
+ } else {
+ t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1;
+ *r = *r + t;
+ *fl = fls(*r) - 1;
+ *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
+ *fl -= FLI_OFFSET;
+ /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0!
+ *fl = *sl = 0;
+ */
+ *r &= ~t;
+ }
+}
+
+/**
+ * Returns indexes (fl, sl) which is used as starting point to search
+ * for a block of size r. It also rounds up requested size(r) to the
+ * next list.
+ */
+static inline void MAPPING_INSERT(size_t r, int *fl, int *sl)
+{
+ if (r < SMALL_BLOCK) {
+ *fl = 0;
+ *sl = r / (SMALL_BLOCK / MAX_SLI);
+ } else {
+ *fl = fls(r) - 1;
+ *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
+ *fl -= FLI_OFFSET;
+ }
+}
+
+/**
+ * Returns first block from a list that hold blocks larger than or
+ * equal to the one pointed by the indexes (fl, sl)
+ */
+static inline struct bhdr *FIND_SUITABLE_BLOCK(struct pool *p, int *fl,
+ int *sl)
+{
+ u32 tmp = p->sl_bitmap[*fl] & (~0 << *sl);
+ struct bhdr *b = NULL;
+
+ if (tmp) {
+ *sl = ffs(tmp) - 1;
+ b = p->matrix[*fl][*sl];
+ } else {
+ *fl = ffs(p->fl_bitmap & (~0 << (*fl + 1))) - 1;
+ if (*fl > 0) { /* likely */
+ *sl = ffs(p->sl_bitmap[*fl]) - 1;
+ b = p->matrix[*fl][*sl];
+ }
+ }
+ return b;
+}
+
+/**
+ * Remove first free block(b) from free list with indexes (fl, sl).
+ */
+static inline void EXTRACT_BLOCK_HDR(struct bhdr *b, struct pool *p, int fl,
+ int sl)
+{
+ p->matrix[fl][sl] = b->ptr.free_ptr.next;
+ if (p->matrix[fl][sl])
+ p->matrix[fl][sl]->ptr.free_ptr.prev = NULL;
+ else {
+ clear_bit(sl, (void *)&p->sl_bitmap[fl]);
+ if(!p->sl_bitmap[fl])
+ clear_bit (fl, (void *)&p->fl_bitmap);
+ }
+ b->ptr.free_ptr = (struct free_ptr) {NULL, NULL};
+}
+
+/**
+ * Removes block(b) from free list with indexes (fl, sl)
+ */
+static inline void EXTRACT_BLOCK(struct bhdr *b, struct pool *p, int fl,
+ int sl)
+{
+ if (b->ptr.free_ptr.next)
+ b->ptr.free_ptr.next->ptr.free_ptr.prev =
+ b->ptr.free_ptr.prev;
+ if (b->ptr.free_ptr.prev)
+ b->ptr.free_ptr.prev->ptr.free_ptr.next =
+ b->ptr.free_ptr.next;
+ if (p->matrix[fl][sl] == b) {
+ p->matrix[fl][sl] = b->ptr.free_ptr.next;
+ if (!p->matrix[fl][sl]) {
+ clear_bit(sl, (void *)&p->sl_bitmap[fl]);
+ if (!p->sl_bitmap[fl])
+ clear_bit (fl, (void *)&p->fl_bitmap);
+ }
+ }
+ b->ptr.free_ptr = (struct free_ptr) {NULL, NULL};
+}
+
+/**
+ * Insert block(b) in free list with indexes (fl, sl)
+ */
+static inline void INSERT_BLOCK(struct bhdr *b, struct pool *p, int fl, int sl)
+{
+ b->ptr.free_ptr = (struct free_ptr) {NULL, p->matrix[fl][sl]};
+ if (p->matrix[fl][sl])
+ p->matrix[fl][sl]->ptr.free_ptr.prev = b;
+ p->matrix[fl][sl] = b;
+ set_bit(sl, (void *)&p->sl_bitmap[fl]);
+ set_bit(fl, (void *)&p->fl_bitmap);
+}
+
+/**
+ * Region is a virtually contiguous memory region and Pool is
+ * collection of such regions
+ */
+static inline void ADD_REGION(void *region, size_t region_size,
+ struct pool *pool)
+{
+ int fl, sl;
+ struct bhdr *b, *lb;
+
+ b = (struct bhdr *)(region);
+ b->prev_hdr = NULL;
+ b->size = ROUNDDOWN_SIZE(region_size - 2 * BHDR_OVERHEAD)
+ | FREE_BLOCK | PREV_USED;
+ MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl);
+ INSERT_BLOCK(b, pool, fl, sl);
+ /* The sentinel block: allows us to know when we're in the last block */
+ lb = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+ lb->prev_hdr = b;
+ lb->size = 0 | USED_BLOCK | PREV_FREE;
+ pool->used_size += BHDR_OVERHEAD; /* only sentinel block is "used" */
+ pool->num_regions++;
+ stat_inc(&pool->count_region_alloc);
+}
+
+/*
+ * Allocator code start
+ */
+
+/**
+ * tlsf_create_memory_pool - create dynamic memory pool
+ * @name: name of the pool
+ * @get_mem: callback function used to expand pool
+ * @put_mem: callback function used to shrink pool
+ * @init_size: inital pool size (in bytes)
+ * @max_size: maximum pool size (in bytes) - set this as 0 for no limit
+ * @grow_size: amount of memory (in bytes) added to pool whenever required
+ *
+ * All size values are rounded up to next page boundary.
+ */
+void *tlsf_create_memory_pool(const char *name,
+ get_memory get_mem,
+ put_memory put_mem,
+ size_t init_size,
+ size_t max_size,
+ size_t grow_size)
+{
+ struct pool *pool;
+ void *region;
+#if STATS
+ size_t used, total;
+#endif
+
+ if (max_size)
+ BUG_ON(max_size < init_size);
+
+ pool = get_mem(ROUNDUP_SIZE(sizeof(*pool)));
+ if (pool == NULL)
+ goto out;
+ memset(pool, 0, ROUNDUP_SIZE(sizeof(*pool)));
+
+ /* Round to next page boundary */
+ init_size = ROUNDUP_PAGE(init_size);
+ max_size = ROUNDUP_PAGE(max_size);
+ grow_size = ROUNDUP_PAGE(grow_size);
+ pr_info(T "pool: %p, init_size=%zu, max_size=%zu, grow_size=%zu\n",
+ pool, init_size, max_size, grow_size);
+
+ /* pool global overhead not included in used size */
+ pool->used_size = 0;
+
+ pool->init_size = init_size;
+ pool->max_size = max_size;
+ pool->grow_size = grow_size;
+ pool->get_mem = get_mem;
+ pool->put_mem = put_mem;
+ strncpy(pool->name, name, MAX_POOL_NAME_LEN);
+ pool->name[MAX_POOL_NAME_LEN - 1] = '\0';
+#if DEBUG
+ pool->valid = 1;
+#endif
+ region = get_mem(init_size);
+ if (region == NULL)
+ goto out_region;
+ ADD_REGION(region, init_size, pool);
+ pool->init_region = region;
+
+ mutex_init(&pool->lock);
+
+ list_add_tail(&pool->list, &pool_list_head);
+
+ /* Pool created: update stats */
+ stat_set(&used, tlsf_get_used_size(pool));
+ stat_set(&total, tlsf_get_total_size(pool));
+ stat_inc(&pool->count_alloc);
+ stat_setmax(&pool->peak_extra, total - used);
+ stat_setmax(&pool->peak_used, used);
+ stat_setmax(&pool->peak_total, total);
+
+ return pool;
+
+out_region:
+ put_mem(pool);
+
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tlsf_create_memory_pool);
+
+/**
+ * tlsf_get_used_size - get memory currently used by given pool
+ *
+ * Used memory includes stored data + metadata + internal fragmentation
+ */
+size_t tlsf_get_used_size(void *mem_pool)
+{
+ struct pool *pool = (struct pool *)mem_pool;
+ return pool->used_size;
+}
+EXPORT_SYMBOL_GPL(tlsf_get_used_size);
+
+/**
+ * tlsf_get_total_size - get total memory currently allocated for given pool
+ *
+ * This is the total memory currently allocated for this pool which includes
+ * used size + free size.
+ *
+ * (Total - Used) is good indicator of memory efficiency of allocator.
+ */
+size_t tlsf_get_total_size(void *mem_pool)
+{
+ size_t total;
+ struct pool *pool = (struct pool *)mem_pool;
+ total = ROUNDUP_SIZE(sizeof(*pool))
+ + pool->init_size
+ + (pool->num_regions - 1) * pool->grow_size;
+ return total;
+}
+EXPORT_SYMBOL_GPL(tlsf_get_total_size);
+
+/**
+ * tlsf_destory_memory_pool - cleanup given pool
+ * @mem_pool: Pool to be destroyed
+ *
+ * Data structures associated with pool are freed.
+ * All memory allocated from pool must be freed before
+ * destorying it.
+ */
+void tlsf_destroy_memory_pool(void *mem_pool)
+{
+ struct pool *pool = (struct pool *)mem_pool;
+
+ /* User is destorying without ever allocating from this pool */
+ if (tlsf_get_used_size(pool) == BHDR_OVERHEAD) {
+ pool->put_mem(pool->init_region);
+ pool->used_size -= BHDR_OVERHEAD;
+ stat_inc(&pool->count_region_free);
+ }
+
+ /* Check for memory leaks in this pool */
+ if (tlsf_get_used_size(pool)) {
+ pr_warning(T "memory leak in pool: %s (%p). "
+ "%zu bytes still in use.\n",
+ pool->name, pool, tlsf_get_used_size(pool));
+
+#if DEBUG
+ pool->valid = 0;
+ /* Invalid pools stay in list for debugging purpose */
+ return;
+#endif
+ }
+ mutex_lock(&pool_list_lock);
+ list_del_init(&pool->list);
+ mutex_unlock(&pool_list_lock);
+ pool->put_mem(pool);
+}
+EXPORT_SYMBOL_GPL(tlsf_destroy_memory_pool);
+
+/**
+ * tlsf_malloc - allocate memory from given pool
+ * @size: no. of bytes
+ * @mem_pool: pool to allocate from
+ */
+void *tlsf_malloc(size_t size, void *mem_pool)
+{
+ struct pool *pool = (struct pool *)mem_pool;
+ struct bhdr *b, *b2, *next_b, *region;
+ int fl, sl;
+ size_t tmp_size;
+#if STATS
+ size_t used, total;
+#endif
+
+#if DEBUG
+ unsigned int retries = 0;
+#endif
+
+ size = (size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : ROUNDUP_SIZE(size);
+ /* Rounding up the requested size and calculating fl and sl */
+
+ mutex_lock(&pool->lock);
+retry_find:
+ MAPPING_SEARCH(&size, &fl, &sl);
+
+ /* Searching a free block */
+ if (!(b = FIND_SUITABLE_BLOCK(pool, &fl, &sl))) {
+#if DEBUG
+ /*
+ * This can happen if there are too many users
+ * allocating from this pool simultaneously.
+ */
+ if (unlikely(retries == MAX_RETRY_EXPAND))
+ goto out_locked;
+ retries++;
+#endif
+ /* Not found */
+ if (size > (pool->grow_size - 2 * BHDR_OVERHEAD))
+ goto out_locked;
+ if (pool->max_size && (pool->init_size +
+ pool->num_regions * pool->grow_size
+ > pool->max_size))
+ goto out_locked;
+ mutex_unlock(&pool->lock);
+ if ((region = pool->get_mem(pool->grow_size)) == NULL)
+ goto out;
+ mutex_lock(&pool->lock);
+ ADD_REGION(region, pool->grow_size, pool);
+ goto retry_find;
+ }
+ EXTRACT_BLOCK_HDR(b, pool, fl, sl);
+
+ /*-- found: */
+ next_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+ /* Should the block be split? */
+ tmp_size = (b->size & BLOCK_SIZE_MASK) - size;
+ if (tmp_size >= sizeof(struct bhdr) ) {
+ tmp_size -= BHDR_OVERHEAD;
+ b2 = GET_NEXT_BLOCK(b->ptr.buffer, size);
+
+ b2->size = tmp_size | FREE_BLOCK | PREV_USED;
+ b2->prev_hdr = b;
+
+ next_b->prev_hdr = b2;
+
+ MAPPING_INSERT(tmp_size, &fl, &sl);
+ INSERT_BLOCK(b2, pool, fl, sl);
+
+ b->size = size | (b->size & PREV_STATE);
+ } else {
+ next_b->size &= (~PREV_FREE);
+ b->size &= (~FREE_BLOCK); /* Now it's used */
+ }
+
+ pool->used_size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+
+ /* Successful alloc: update stats. */
+ stat_set(&used, tlsf_get_used_size(pool));
+ stat_set(&total, tlsf_get_total_size(pool));
+ stat_inc(&pool->count_alloc);
+ stat_setmax(&pool->peak_extra, total - used);
+ stat_setmax(&pool->peak_used, used);
+ stat_setmax(&pool->peak_total, total);
+
+ mutex_unlock(&pool->lock);
+ return (void *)b->ptr.buffer;
+
+ /* Failed alloc */
+out_locked:
+ mutex_unlock(&pool->lock);
+
+out:
+ stat_inc(&pool->count_failed_alloc);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tlsf_malloc);
+
+/**
+ * tlsf_free - free memory from given pool
+ * @ptr: address of memory to be freed
+ * @mem_pool: pool to free from
+ */
+void tlsf_free(void *ptr, void *mem_pool)
+{
+ struct pool *pool = (struct pool *)mem_pool;
+ struct bhdr *b, *tmp_b;
+ int fl = 0, sl = 0;
+#if STATS
+ size_t used, total;
+#endif
+ if (unlikely(ptr == NULL))
+ return;
+
+ b = (struct bhdr *) ((char *) ptr - BHDR_OVERHEAD);
+
+ mutex_lock(&pool->lock);
+ b->size |= FREE_BLOCK;
+ pool->used_size -= (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+ b->ptr.free_ptr = (struct free_ptr) { NULL, NULL};
+ tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+ if (tmp_b->size & FREE_BLOCK) {
+ MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl);
+ EXTRACT_BLOCK(tmp_b, pool, fl, sl);
+ b->size += (tmp_b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+ }
+ if (b->size & PREV_FREE) {
+ tmp_b = b->prev_hdr;
+ MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl);
+ EXTRACT_BLOCK(tmp_b, pool, fl, sl);
+ tmp_b->size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+ b = tmp_b;
+ }
+ tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+ tmp_b->prev_hdr = b;
+
+ MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl);
+
+ if ((b->prev_hdr == NULL) && ((tmp_b->size & BLOCK_SIZE_MASK) == 0)) {
+ pool->put_mem(b);
+ pool->num_regions--;
+ pool->used_size -= BHDR_OVERHEAD; /* sentinel block header */
+ stat_inc(&pool->count_region_free);
+ goto out;
+ }
+
+ INSERT_BLOCK(b, pool, fl, sl);
+
+ tmp_b->size |= PREV_FREE;
+ tmp_b->prev_hdr = b;
+out:
+ /* Update stats */
+ stat_set(&used, tlsf_get_used_size(pool));
+ stat_set(&total, tlsf_get_total_size(pool));
+ stat_inc(&pool->count_free);
+ stat_setmax(&pool->peak_extra, total - used);
+ stat_setmax(&pool->peak_used, used);
+ stat_setmax(&pool->peak_total, total);
+
+ mutex_unlock(&pool->lock);
+}
+EXPORT_SYMBOL_GPL(tlsf_free);
+
+/**
+ * tlsf_calloc - allocate and zero-out memory from given pool
+ * @size: no. of bytes
+ * @mem_pool: pool to allocate from
+ */
+void *tlsf_calloc(size_t nelem, size_t elem_size, void *mem_pool)
+{
+ void *ptr;
+
+ if (nelem == 0 || elem_size == 0)
+ return NULL;
+
+ if ((ptr = tlsf_malloc(nelem * elem_size, mem_pool)) == NULL)
+ return NULL;
+ memset(ptr, 0, nelem * elem_size);
+
+ return ptr;
+}
+EXPORT_SYMBOL_GPL(tlsf_calloc);
+
+static int __init tlsf_init(void)
+{
+ INIT_LIST_HEAD(&pool_list_head);
+ mutex_init(&pool_list_lock);
+#ifdef CONFIG_TLSFINFO
+ proc = create_proc_entry("tlsfinfo", S_IRUGO, NULL);
+ if (proc)
+ proc->proc_fops = &proc_tlsfinfo_operations;
+ else
+ pr_warning(T "error creating proc entry\n");
+#endif
+ return 0;
+}
+
+static void __exit tlsf_exit(void)
+{
+#ifdef CONFIG_TLSFINFO
+ if (proc)
+ remove_proc_entry("tlsfinfo", &proc_root);
+#endif
+ return;
+}
+
+module_init(tlsf_init);
+module_exit(tlsf_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nitin Gupta <nitingupta910 at gmail dot com>");
+MODULE_DESCRIPTION("TLSF Memory Allocator");
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* [RFC][PATCH 5/6] compcache: TLSF Allocator - internal defs
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
` (3 preceding siblings ...)
2008-03-20 20:10 ` [RFC][PATCH 4/6] compcache: TLSF Allocator Nitin Gupta
@ 2008-03-20 20:11 ` Nitin Gupta
2008-03-20 20:12 ` [RFC][PATCH 6/6] compcache: Documentation Nitin Gupta
2008-04-09 2:47 ` [RFC][PATCH 0/6] compcache: Compressed Caching Andrew Morton
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:11 UTC (permalink / raw)
To: linux-mm
This contains header to be used internally by TLSF.
It contains flags to enable/disable debugging, stats collection and
other important parameters like alignment requirement.
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
mm/tlsf_int.h | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 179 insertions(+), 0 deletions(-)
diff --git a/mm/tlsf_int.h b/mm/tlsf_int.h
new file mode 100644
index 0000000..626cb3e
--- /dev/null
+++ b/mm/tlsf_int.h
@@ -0,0 +1,179 @@
+/*
+ * To be used internally by TLSF allocator.
+ */
+
+#ifndef _TLSF_INT_H_
+#define _TLSF_INT_H_
+
+#include <linux/tlsf.h>
+#include <asm/types.h>
+
+/* Debugging and Stats */
+#if (1 || defined(CONFIG_DEBUG_TLSF))
+#define DEBUG 1
+#define STATS 1
+#else
+#define DEBUG 0
+#define STATS 0
+#endif
+
+/* Create /proc/tlsfinfo? */
+/* If STATS is disabled, this will give minimal memory usage info. */
+#define CONFIG_TLSFINFO
+
+/* It useless to collect stats if there is no way to export it */
+#if (STATS && !defined(CONFIG_TLSFINFO))
+#error "TLSF stats is enabled but not /proc/tlsfinfo."
+#endif
+
+#define NOP do { } while(0)
+#if STATS
+static inline void stat_inc(size_t *stat)
+{
+ ++*stat;
+}
+
+static inline void stat_dec(size_t *stat)
+{
+ BUG_ON(*stat == 0);
+ --*stat;
+}
+
+static inline void stat_set(size_t *stat, size_t val)
+{
+ *stat = val;
+}
+
+static inline void stat_setmax(size_t *max, size_t cur)
+{
+ *max = (cur > *max) ? cur : *max;
+}
+#else /* STATS */
+
+#define stat_inc(x) NOP
+#define stat_dec(x) NOP
+#define stat_set(x, v) NOP
+#define stat_setmax(x, v) NOP
+#endif /* STATS */
+
+/* Messsage prefix */
+#define T "TLSF: "
+
+#define K(x) ((x) >> 10)
+#define MAX_POOL_NAME_LEN 16
+
+/*-- TLSF structures */
+
+/* Some IMPORTANT TLSF parameters */
+#define MEM_ALIGN (sizeof(void *) * 2)
+#define MEM_ALIGN_MASK (~(MEM_ALIGN - 1))
+
+#define MAX_FLI (30)
+#define MAX_LOG2_SLI (5)
+#define MAX_SLI (1 << MAX_LOG2_SLI)
+
+#define FLI_OFFSET (6)
+/* tlsf structure just will manage blocks bigger than 128 bytes */
+#define SMALL_BLOCK (128)
+#define REAL_FLI (MAX_FLI - FLI_OFFSET)
+#define MIN_BLOCK_SIZE (sizeof(struct free_ptr))
+#define BHDR_OVERHEAD (sizeof(struct bhdr) - MIN_BLOCK_SIZE)
+
+#define PTR_MASK (sizeof(void *) - 1)
+#define BLOCK_SIZE_MASK (0xFFFFFFFF - PTR_MASK)
+
+#define GET_NEXT_BLOCK(addr, r) ((struct bhdr *) \
+ ((char *) (addr) + (r)))
+#define ROUNDUP_SIZE(r) (((r) + MEM_ALIGN - 1) & MEM_ALIGN_MASK)
+#define ROUNDDOWN_SIZE(r) ((r) & MEM_ALIGN_MASK)
+#define ROUNDUP_PAGE(r) (((r) + PAGE_SIZE - 1) & PAGE_MASK)
+
+#define BLOCK_STATE (0x1)
+#define PREV_STATE (0x2)
+
+/* bit 0 of the block size */
+#define FREE_BLOCK (0x1)
+#define USED_BLOCK (0x0)
+
+/* bit 1 of the block size */
+#define PREV_FREE (0x2)
+#define PREV_USED (0x0)
+
+#if DEBUG
+#define MAX_RETRY_EXPAND 10
+#endif
+
+struct free_ptr {
+ struct bhdr *prev;
+ struct bhdr *next;
+};
+
+struct bhdr {
+ /* All blocks in a region are linked in order of physical address */
+ struct bhdr *prev_hdr;
+ /*
+ * The size is stored in bytes
+ * bit 0: block is free, if set
+ * bit 1: previous block is free, if set
+ */
+ u32 size;
+ /* Free blocks in individual freelists are linked */
+ union {
+ struct free_ptr free_ptr;
+ u8 buffer[sizeof(struct free_ptr)];
+ } ptr;
+};
+
+struct pool {
+ /* First level bitmap (REAL_FLI bits) */
+ u32 fl_bitmap;
+
+ /* Second level bitmap */
+ u32 sl_bitmap[REAL_FLI];
+
+ /* Free lists */
+ struct bhdr *matrix[REAL_FLI][MAX_SLI];
+
+ struct mutex lock;
+
+ size_t init_size;
+ size_t max_size;
+ size_t grow_size;
+
+ /* Basic stats */
+ size_t used_size;
+ size_t num_regions;
+
+ /* User provided functions for expanding/shrinking pool */
+ get_memory *get_mem;
+ put_memory *put_mem;
+
+ struct list_head list;
+
+#if STATS
+ /* Extra stats */
+ size_t peak_used;
+ size_t peak_total;
+ size_t peak_extra; /* MAX(Total - Used) */
+ size_t count_alloc;
+ size_t count_free;
+ size_t count_region_alloc;
+ size_t count_region_free;
+ size_t count_failed_alloc;
+#endif
+
+#if DEBUG
+ /*
+ * Pool used size must be 0 when its destroyed.
+ * When non-empty pool is destroyed, it suggests
+ * memory leak. Such pools are marked invalid
+ * and kept in pool list for later debugging.
+ */
+ unsigned int valid;
+#endif
+ void *init_region;
+ char name[MAX_POOL_NAME_LEN];
+};
+/*-- TLSF structures end */
+
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* [RFC][PATCH 6/6] compcache: Documentation
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
` (4 preceding siblings ...)
2008-03-20 20:11 ` [RFC][PATCH 5/6] compcache: TLSF Allocator - internal defs Nitin Gupta
@ 2008-03-20 20:12 ` Nitin Gupta
2008-04-09 2:47 ` [RFC][PATCH 0/6] compcache: Compressed Caching Andrew Morton
6 siblings, 0 replies; 10+ messages in thread
From: Nitin Gupta @ 2008-03-20 20:12 UTC (permalink / raw)
To: linux-mm
compcache usage documentation.
Signed-off-by: Nitin Gupta <nitingupta910 at gmail dot com>
---
Documentation/compcache.txt | 24 ++++++++++++++++++++++++
1 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/Documentation/compcache.txt b/Documentation/compcache.txt
new file mode 100644
index 0000000..9ece295
--- /dev/null
+++ b/Documentation/compcache.txt
@@ -0,0 +1,24 @@
+
+compcache: Compressed RAM based swap device
+-------------------------------------------
+
+Project home: http://code.google.com/p/compcache
+
+* Introduction
+This is a RAM based block device which acts as swap disk.
+Pages swapped to this device are compressed and stored in
+memory itself.
+
+* Usage
+ - modprobe compcache compcache_size_kbytes=<size>
+ (If no size is specified, default size of 25% of RAM is taken).
+ - swapon /dev/ramzswap0
+
+* Notes
+ - Allocator and compcache statistics are exported via /proc interface:
+ - /proc/tlsfinfo
+ - /proc/compcache
+
+
+Nitin Gupta
+(nitingupta910 at gmail dot com)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFC][PATCH 0/6] compcache: Compressed Caching
2008-03-20 19:59 [RFC][PATCH 0/6] compcache: Compressed Caching Nitin Gupta
` (5 preceding siblings ...)
2008-03-20 20:12 ` [RFC][PATCH 6/6] compcache: Documentation Nitin Gupta
@ 2008-04-09 2:47 ` Andrew Morton
2008-04-09 5:02 ` Nitin Gupta
6 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2008-04-09 2:47 UTC (permalink / raw)
To: nitingupta910; +Cc: linux-mm
On Fri, 21 Mar 2008 01:29:58 +0530 Nitin Gupta <nitingupta910@gmail.com> wrote:
> Subject: [RFC][PATCH 0/6] compcache: Compressed Caching
Didn't get many C's, did it?
Be sure to cc linux-kernel on the next version.
> Hi All,
>
> This implements a RAM based block device which acts as swap disk.
> Pages swapped to this disk are compressed and stored in memory itself.
> This allows more applications to fit in given amount of memory. This is
> especially useful for embedded devices, OLPC and small desktops
> (aka virtual machines).
>
> Project home: http://code.google.com/p/compcache/
>
> It consists of following components:
> - compcache.ko: Creates RAM based block device
> - tlsf.ko: Two Level Segregate Fit (TLSF) allocator
> - LZO de/compressor: (Already in mainline)
>
> Project home contains some performance numbers for TLSF and LZO.
> For general desktop use, this is giving *significant* performance gain
> under memory pressure. For now, it has been tested only on x86.
The values of "*significant*" should be exhaustively documented in the
patch changelogs. That is 100%-the-entire-whole-point of the patchset!
Omitting that information tends to reduce the number of C's.
Please feed all diffs through scripts/checkpatch.pl, contemplate the
result.
kmap_atomic() is (much) preferred over kmap().
flush_dcache_page() is needed after the CPU modifies pagecache or anon page
by hand (generally linked to kmap[_atomic]()).
The changelogs should include *complete* justification for the introduction
of a new allocator. What problem is it solving, what are the possible
solutions to that problem, why this one was chosen, etc. It's a fairly big
deal.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFC][PATCH 0/6] compcache: Compressed Caching
2008-04-09 2:47 ` [RFC][PATCH 0/6] compcache: Compressed Caching Andrew Morton
@ 2008-04-09 5:02 ` Nitin Gupta
2008-04-09 5:08 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: Nitin Gupta @ 2008-04-09 5:02 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm
On Wed, Apr 9, 2008 at 8:17 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
> On Fri, 21 Mar 2008 01:29:58 +0530 Nitin Gupta <nitingupta910@gmail.com> wrote:
>
> > Subject: [RFC][PATCH 0/6] compcache: Compressed Caching
>
> Didn't get many C's, did it?
>
> Be sure to cc linux-kernel on the next version.
>
>
I have already posted it again on linux-kernel with link to
performance figures for allocator (TLSF vs SLUB):
see: http://lkml.org/lkml/2008/4/8/69
> > Project home contains some performance numbers for TLSF and LZO.
> > For general desktop use, this is giving *significant* performance gain
> > under memory pressure. For now, it has been tested only on x86.
>
> The values of "*significant*" should be exhaustively documented in the
> patch changelogs. That is 100%-the-entire-whole-point of the patchset!
> Omitting that information tends to reduce the number of C's.
>
I will also post performance numbers for compcache.
Desktop seems so much more responsive with this. I need to see how I
can quantify this.
> Please feed all diffs through scripts/checkpatch.pl, contemplate the
> result.
>
> kmap_atomic() is (much) preferred over kmap().
>
We are doing compression and alloc (can sleep) between kmap/kunmap.
So, I used kmap() instead of kmap_atomic().
> flush_dcache_page() is needed after the CPU modifies pagecache or anon page
> by hand (generally linked to kmap[_atomic]()).
>
ok. I will add this.
> The changelogs should include *complete* justification for the introduction
> of a new allocator. What problem is it solving, what are the possible
> solutions to that problem, why this one was chosen, etc. It's a fairly big
> deal.
>
TLSF comparison with SLUB can be found here:
http://code.google.com/p/compcache/wiki/AllocatorsComparison
Thanks for review.
Regards,
Nitin
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH 0/6] compcache: Compressed Caching
2008-04-09 5:02 ` Nitin Gupta
@ 2008-04-09 5:08 ` Andrew Morton
0 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2008-04-09 5:08 UTC (permalink / raw)
To: Nitin Gupta; +Cc: linux-mm
On Wed, 9 Apr 2008 10:32:06 +0530 "Nitin Gupta" <nitingupta910@gmail.com> wrote:
> On Wed, Apr 9, 2008 at 8:17 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
> > On Fri, 21 Mar 2008 01:29:58 +0530 Nitin Gupta <nitingupta910@gmail.com> wrote:
> >
> > > Subject: [RFC][PATCH 0/6] compcache: Compressed Caching
> >
> > Didn't get many C's, did it?
> >
> > Be sure to cc linux-kernel on the next version.
> >
> >
>
> I have already posted it again on linux-kernel with link to
> performance figures for allocator (TLSF vs SLUB):
>
> see: http://lkml.org/lkml/2008/4/8/69
>
Information like this should be maintained within the changelog, please.
> TLSF comparison with SLUB can be found here:
>
> http://code.google.com/p/compcache/wiki/AllocatorsComparison
Ditto.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 10+ messages in thread