From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
To: linux-mm <linux-mm@kvack.org>
Subject: [RFC:PATCH 005/012] Base file tail function
Date: Thu, 24 May 2007 08:11:57 -0400 [thread overview]
Message-ID: <20070524121157.13533.32213.sendpatchset@kleikamp.austin.ibm.com> (raw)
In-Reply-To: <20070524121130.13533.32563.sendpatchset@kleikamp.austin.ibm.com>
Base file tail function
This is the code to allocate, free, and unpack the tail into a normal page.
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
---
include/linux/file_tail.h | 67 ++++++++++
mm/Makefile | 1
mm/file_tail.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 361 insertions(+)
diff -Nurp linux004/include/linux/file_tail.h linux005/include/linux/file_tail.h
--- linux004/include/linux/file_tail.h 1969-12-31 18:00:00.000000000 -0600
+++ linux005/include/linux/file_tail.h 2007-05-23 22:53:11.000000000 -0500
@@ -0,0 +1,67 @@
+#ifndef FILE_TAIL_H
+#define FILE_TAIL_H
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * VM File Tails are used to compactly store the data at the end of the
+ * file in a small SLAB-allocated buffer when the base page size is large.
+ */
+
+#ifdef CONFIG_VM_FILE_TAILS
+
+extern struct page *page_cache_alloc_tail(struct address_space *);
+extern void page_cache_free_tail(struct page *);
+extern void __page_cache_free_tail_buffer(struct page *);
+
+static inline void page_cache_free_tail_buffer(struct page *page)
+{
+ if (PageFileTail(page))
+ __page_cache_free_tail_buffer(page);
+}
+
+/*
+ * Caller must hold write_lock_irq(&mapping->tree_lock)
+ */
+extern int __unpack_file_tail(struct address_space *);
+
+static inline int unpack_file_tail(struct address_space *mapping)
+{
+ int rc;
+ write_lock_irq(&mapping->tree_lock);
+ rc = __unpack_file_tail(mapping);
+ write_unlock_irq(&mapping->tree_lock);
+ return rc;
+}
+
+static inline void preallocate_page_cache_tail(struct address_space *mapping,
+ unsigned long end_index)
+{
+ struct inode *inode = mapping->host;
+ struct page *page;
+
+ if (mapping->tail)
+ return;
+ if (!IS_FILE_TAIL_CAPABLE(inode))
+ return;
+ if (file_tail_index(mapping) != end_index)
+ return;
+ if (file_tail_buf_size(mapping) > PAGE_CACHE_SIZE / 2)
+ return;
+
+ page = page_cache_alloc_tail(mapping);
+ if (page)
+ page_cache_release(page);
+}
+
+#else /* !CONFIG_VM_FILE_TAILS */
+
+#define unpack_file_tail(mapping) 0
+#define page_cache_free_tail(page) do {} while (0)
+#define page_cache_free_tail_buffer(page) do {} while (0)
+#define preallocate_page_cache_tail(page, end_index) do {} while (0)
+
+#endif /* CONFIG_VM_FILE_TAILS */
+
+#endif /* FILE_TAIL_H */
diff -Nurp linux004/mm/Makefile linux005/mm/Makefile
--- linux004/mm/Makefile 2007-05-21 15:15:48.000000000 -0500
+++ linux005/mm/Makefile 2007-05-23 22:53:11.000000000 -0500
@@ -31,4 +31,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
+obj-$(CONFIG_VM_FILE_TAILS) += file_tail.o
diff -Nurp linux004/mm/file_tail.c linux005/mm/file_tail.c
--- linux004/mm/file_tail.c 1969-12-31 18:00:00.000000000 -0600
+++ linux005/mm/file_tail.c 2007-05-23 22:53:11.000000000 -0500
@@ -0,0 +1,293 @@
+/*
+ * linux/mm/file_tail.c
+ *
+ * Copyright (C) International Business Machines Corp., 2006-2007
+ * Author: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
+ */
+
+/*
+ * VM File Tails are used to compactly store the data at the end of the
+ * file in a small SLAB-allocated buffer when the base page size is large.
+ */
+
+#include <linux/file_tail.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+#include "internal.h"
+
+static struct kmem_cache *tail_page_cachep;
+
+/*
+ * Maybe this could become more generic, but for now, I need it here
+ */
+static void lru_cache_delete(struct page *page)
+{
+ if (PageLRU(page)) {
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ BUG_ON(!PageLRU(page));
+ __ClearPageLRU(page);
+ del_page_from_lru(zone, page);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ }
+}
+
+/*
+ * Unpack short_page into full_page.
+ * short_page is locked and has no buffers bound to it.
+ * full_page is newly allocated.
+ */
+static int unpack_tail(struct address_space *mapping, pgoff_t index,
+ struct page *short_page, struct page *full_page)
+{
+ int error;
+ char *kaddr;
+ char *tail;
+ char *tail_buf;
+ int tail_length;
+
+ /* This is the equivalent of remove_from_page_cache and
+ * add_to_page_cache_lru, without dropping tree_lock
+ */
+ error = radix_tree_preload(mapping_gfp_mask(mapping));
+ if (unlikely(error))
+ return error;
+
+ write_lock_irq(&mapping->tree_lock);
+ radix_tree_delete(&mapping->page_tree, index);
+ short_page->mapping = NULL;
+ tail = mapping->tail;
+ tail_buf = mapping->tail_buf;
+ mapping->tail = mapping->tail_buf = NULL;
+
+ error = radix_tree_insert(&mapping->page_tree, index, full_page);
+ if (unlikely(error)) {
+ printk(KERN_ERR "unpack_tail: radix_tree_insert failed!\n");
+ kfree(tail_buf);
+ unlock_page(short_page);
+ page_cache_release(short_page);
+ return error;
+ }
+ page_cache_get(full_page);
+ SetPageLocked(full_page);
+ full_page->mapping = mapping;
+ full_page->index = index;
+
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+ page_cache_release(short_page); /* page cache ref */
+
+ /* Copy data from tail to full page */
+ if (PageUptodate(short_page)) {
+ kaddr = kmap_atomic(full_page, KM_USER0);
+ tail_length = file_tail_buf_size(mapping);
+ memcpy(kaddr, tail, tail_length);
+ memset(kaddr+tail_length, 0, PAGE_CACHE_SIZE - tail_length);
+ kunmap_atomic(kaddr, KM_USER0);
+ SetPageUptodate(full_page);
+ }
+ kfree(tail_buf);
+
+ /* finalize full_page */
+ if (PageUptodate(short_page) && PageDirty(short_page)) {
+ SetPageDirty(full_page);
+ write_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_set(&mapping->page_tree, index,
+ PAGECACHE_TAG_DIRTY);
+ write_unlock_irq(&mapping->tree_lock);
+ }
+ lru_cache_add(full_page);
+ unlock_page(full_page);
+ page_cache_release(full_page);
+
+ /* release short_page */
+ unlock_page(short_page);
+ page_cache_release(short_page);
+
+ return 0;
+}
+
+/*
+ * Caller must hold write lock on mapping->tree_lock
+ */
+int __unpack_file_tail(struct address_space *mapping)
+{
+ pgoff_t index;
+ struct page *full_page = NULL;
+ int rc = 0;
+ struct page *short_page;
+
+ while (mapping->tail) {
+ write_unlock_irq(&mapping->tree_lock);
+ index = file_tail_index(mapping);
+
+ /* Allocate full page */
+ if (!full_page)
+ full_page = page_cache_alloc(mapping);
+ if (!full_page) {
+ rc = -ENOMEM;
+ write_lock_irq(&mapping->tree_lock);
+ break;
+ }
+
+ /* Get & lock short page */
+ short_page = find_lock_page(mapping, index);
+ if (!short_page || !PageFileTail(short_page)) {
+ if (short_page) {
+ unlock_page(short_page);
+ page_cache_release(short_page);
+ }
+ write_lock_irq(&mapping->tree_lock);
+ continue;
+ }
+ wait_on_page_writeback(short_page);
+ lru_cache_delete(short_page);
+ /* We have the tail page locked, so this shouldn't go away */
+ BUG_ON(!mapping->tail);
+
+ if (page_has_buffers(short_page) &&
+ !try_to_release_page(short_page,
+ mapping_gfp_mask(mapping))) {
+ /* How hard to do we need to try? */
+ sync_blockdev(mapping->host->i_sb->s_bdev);
+ if (page_has_buffers(short_page) &&
+ !try_to_release_page(short_page,
+ mapping_gfp_mask(mapping))) {
+ printk(KERN_ERR "__unpack_file_tail: "
+ "can't release page\n");
+ page_cache_release(short_page);
+ rc = -EIO; /* What's a good return code? */
+ write_lock_irq(&mapping->tree_lock);
+ break;
+ }
+ }
+
+ rc = unpack_tail(mapping, index, short_page, full_page);
+ if (rc) {
+ write_lock_irq(&mapping->tree_lock);
+ break;
+ }
+ full_page = NULL;
+
+ /*
+ * unlikely, but check to see if there was no tail added
+ * back. We need to return with tree_lock held.
+ */
+ write_lock_irq(&mapping->tree_lock);
+
+ }
+ if (full_page)
+ page_cache_release(full_page);
+ return rc;
+}
+
+static void init_once(void *ptr, struct kmem_cache *cachep, unsigned long flags)
+{
+ struct page *page = (struct page *)ptr;
+
+ memset(page, 0, sizeof(struct page));
+ reset_page_mapcount(page);
+ INIT_LIST_HEAD(&page->lru);
+ SetPageFileTail(page);
+}
+
+static __init int file_tail_init(void)
+{
+ tail_page_cachep = kmem_cache_create("tail_page_cache",
+ sizeof(struct page), 0, 0,
+ init_once, NULL);
+ if (tail_page_cachep == NULL) {
+ printk (KERN_ERR "Failed to create tail_page_cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+__initcall(file_tail_init);
+
+struct page *page_cache_alloc_tail(struct address_space *mapping)
+{
+ int block_size = 1 << mapping->host->i_blkbits;
+ int error;
+ pgoff_t index;
+ struct page *page;
+ int size;
+ void *tail;
+ void *tail_buf;
+
+ size = file_tail_buf_size(mapping);
+ index = file_tail_index(mapping);
+
+ page = find_get_page(mapping, index);
+ if (page)
+ return page;
+
+ page = kmem_cache_alloc(tail_page_cachep, GFP_KERNEL);
+ if (!page)
+ return NULL;
+
+ /*
+ * For pages up to 1/8 of a page, kmalloc returns well-aligned
+ * buffers. For smaller allocations, we need to align it ourselves
+ */
+ if (size < PAGE_SIZE >> 3) {
+ tail_buf = kmalloc(size + block_size - 1, GFP_KERNEL);
+ tail = (void *)ALIGN((size_t)tail_buf, block_size);
+ } else
+ tail_buf = tail = kmalloc(size, GFP_KERNEL);
+
+ if (!tail) {
+ kmem_cache_free(tail_page_cachep, page);
+ return NULL;
+ }
+ /* Just to make sure */
+ BUG_ON((size_t)tail & (block_size - 1));
+
+ set_page_count(page, 1);
+ page->flags = 0;
+ SetPageFileTail(page);
+
+ error = add_to_page_cache_lru(page, mapping, index,
+ mapping_gfp_mask(mapping));
+ if (error) {
+ kfree(tail_buf);
+ kmem_cache_free(tail_page_cachep, page);
+ return NULL;
+ }
+ write_lock_irq(&mapping->tree_lock);
+ /*
+ * Make sure the file size didn't change
+ */
+ if (mapping->tail || (index != file_tail_index(mapping)) ||
+ (size != file_tail_buf_size(mapping))) {
+ write_unlock_irq(&mapping->tree_lock);
+ __put_page(page);
+ page_cache_release(page);
+ kfree(tail_buf);
+ return NULL;
+ }
+ mapping->tail = tail;
+ mapping->tail_buf = tail_buf;
+ write_unlock_irq(&mapping->tree_lock);
+ unlock_page(page);
+
+ return page;
+}
+
+void page_cache_free_tail(struct page *page)
+{
+ kmem_cache_free(tail_page_cachep, page);
+}
+
+void __page_cache_free_tail_buffer(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ kfree(mapping->tail_buf);
+ mapping->tail_buf = mapping->tail = NULL;
+}
--
David Kleikamp
IBM Linux Technology Center
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-05-24 12:11 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-24 12:11 [RFC:PATCH 000/012] VM Page Tails Dave Kleikamp
2007-05-24 12:11 ` [RFC:PATCH 001/012] Make iommu_map_sg deal with less-than-page-aligned data Dave Kleikamp
2007-05-24 12:11 ` [RFC:PATCH 002/012] Allow file systems to specify whether to store file tails Dave Kleikamp
2007-05-24 12:11 ` [RFC:PATCH 003/012] Add tail to address space and define PG_pagetail page flag Dave Kleikamp
2007-05-24 12:11 ` [RFC:PATCH 004/012] Replace PAGE_CACHE_SIZE with page_data_size() Dave Kleikamp
2007-05-24 12:11 ` Dave Kleikamp [this message]
2007-05-24 12:12 ` [RFC:PATCH 006/012] Modify lowmem_page_address() & page_to_phys() to special case tail page Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 007/012] Avoid page_to_pfn() on " Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 008/012] bh_offset needs to take page_address into consideration Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 009/012] Wrap i_size_write Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 010/012] unpack tail page to avoid memory mapping Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 011/012] Make sure tail page is freed correctly Dave Kleikamp
2007-05-24 12:12 ` [RFC:PATCH 012/012] Add tail hooks into file_map.c Dave Kleikamp
2007-05-24 12:45 ` [RFC:PATCH 000/012] VM File Tails Dave Kleikamp
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070524121157.13533.32213.sendpatchset@kleikamp.austin.ibm.com \
--to=shaggy@linux.vnet.ibm.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox