From: Dave Kleikamp <shaggy@austin.ibm.com>
To: linux-mm <linux-mm@kvack.org>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>,
Dave McCracken <dmccr@us.ibm.com>,
Badari Pulavarty <pbadari@us.ibm.com>
Subject: [RFC:PATCH 002/008] Base file tail function
Date: Mon, 17 Jul 2006 22:08:21 -0600 [thread overview]
Message-ID: <20060718040820.11926.12387.sendpatchset@kleikamp.austin.ibm.com> (raw)
In-Reply-To: <20060718040804.11926.76333.sendpatchset@kleikamp.austin.ibm.com>
Base file tail function
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
diff -Nurp linux001/include/linux/file_tail.h linux002/include/linux/file_tail.h
--- linux001/include/linux/file_tail.h 1969-12-31 18:00:00.000000000 -0600
+++ linux002/include/linux/file_tail.h 2006-07-17 23:04:37.000000000 -0500
@@ -0,0 +1,48 @@
+#ifndef FILE_TAIL_H
+#define FILE_TAIL_H
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * This file deals with storing tails of files in buffers smaller than a page
+ */
+
+#ifdef CONFIG_FILE_TAILS
+
+#define FILE_TAIL_INDEX(mapping) \
+ (i_size_read((mapping)->host) >> PAGE_CACHE_SHIFT)
+#define FILE_TAIL_LENGTH(mapping) \
+ (i_size_read((mapping)->host) & (PAGE_CACHE_SIZE - 1))
+
+static inline int page_data_size(struct page *page)
+{
+ if (PageTail(page))
+ return FILE_TAIL_LENGTH(page->mapping);
+ else
+ return PAGE_CACHE_SIZE;
+}
+
+extern struct page *page_cache_alloc_tail(struct address_space *);
+void page_cache_free_tail(struct page *);
+void pack_file_tail(struct page *);
+/*
+ * Called holding write_lock_irq(&mapping->tree_lock)
+ */
+extern void __unpack_file_tail(struct address_space *);
+
+static inline void unpack_file_tail(struct address_space *mapping)
+{
+ write_lock_irq(&mapping->tree_lock);
+ __unpack_file_tail(mapping);
+ write_unlock_irq(&mapping->tree_lock);
+}
+
+#else /* !CONFIG_FILE_TAILS */
+
+#define page_data_size(page) PAGE_CACHE_SIZE
+#define unpack_file_tail(mapping) do {} while (0)
+
+#endif /* CONFIG_FILE_TAILS */
+
+#endif /* FILE_TAIL_H */
diff -Nurp linux001/mm/Makefile linux002/mm/Makefile
--- linux001/mm/Makefile 2006-06-17 20:49:35.000000000 -0500
+++ linux002/mm/Makefile 2006-07-17 23:04:37.000000000 -0500
@@ -23,4 +23,5 @@ obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
+obj-$(CONFIG_FILE_TAILS) += file_tail.o
diff -Nurp linux001/mm/file_tail.c linux002/mm/file_tail.c
--- linux001/mm/file_tail.c 1969-12-31 18:00:00.000000000 -0600
+++ linux002/mm/file_tail.c 2006-07-17 23:04:37.000000000 -0500
@@ -0,0 +1,305 @@
+/*
+ * linux/mm/file_tail.c
+ *
+ * Copyright (C) International Business Machines Corp., 2006
+ */
+
+/*
+ * This file deals with storing tails of files in buffers smaller than a page
+ */
+
+#include <linux/file_tail.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+#include "internal.h"
+
+static struct kmem_cache *tail_page_cachep;
+
+/*
+ * Maybe this could become more generic, but for now, I need it here
+ */
+static void lru_cache_delete(struct page *page)
+{
+ if (PageLRU(page)) {
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ BUG_ON(!PageLRU(page));
+ __ClearPageLRU(page);
+ del_page_from_lru(zone, page);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ }
+}
+
+/*
+ * Caller must hold write lock on mapping->tree_lock
+ */
+void __unpack_file_tail(struct address_space *mapping)
+{
+ int error;
+ unsigned long index;
+ char *kaddr;
+ struct page *full_page = NULL, *short_page;
+ char *tail;
+ int tail_length;
+
+ while(mapping->tail) {
+ index = FILE_TAIL_INDEX(mapping);
+ write_unlock_irq(&mapping->tree_lock);
+
+ /* Allocate full page */
+ if (!full_page)
+ full_page = page_cache_alloc(mapping);
+ BUG_ON(!full_page);
+
+ /* Get & lock short page */
+ short_page = find_lock_page(mapping, index);
+ if (!short_page || !PageTail(short_page)) {
+ if (short_page) {
+ unlock_page(short_page);
+ page_cache_release(short_page);
+ }
+ /* anything can happen since we released the lock */
+ write_lock_irq(&mapping->tree_lock);
+ continue;
+ }
+ /* We have the tail page locked, so this shouldn't go away */
+ BUG_ON(!mapping->tail);
+
+ BUG_ON(page_has_buffers(short_page) || PageDirty(short_page));
+
+ /* This is the equivalent of remove_from_page_cache and
+ * add_to_page_cache_lru, without dropping tree_lock
+ */
+ error = radix_tree_preload(mapping_gfp_mask(mapping));
+ BUG_ON(error);
+ write_lock_irq(&mapping->tree_lock);
+ lru_cache_delete(short_page);
+ radix_tree_delete(&mapping->page_tree, index);
+ short_page->mapping = NULL;
+ tail = mapping->tail;
+ mapping->tail = NULL;
+
+ error = radix_tree_insert(&mapping->page_tree, index,
+ full_page);
+ BUG_ON(error);
+ page_cache_get(full_page);
+ SetPageLocked(full_page);
+ full_page->mapping = mapping;
+ full_page->index = index;
+
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+ page_cache_release(short_page); /* page cache ref */
+
+ /*
+ * Now that the short page has been replaced by the full
+ * page in the radix tree, we need to wait until all of
+ * the references on the short page are gone.
+ */
+ unlock_page(short_page);
+
+ /*
+ * This still needs work. We occasionally get caught in
+ * this loop.
+ */
+ while (page_count(short_page) > 1)
+ schedule();
+ /*
+ * ToDo: Figure out where this is getting added back to
+ * lru
+ */
+ lru_cache_delete(short_page);
+
+ /* Copy data from tail to full page */
+ if (PageUptodate(short_page)) {
+ kaddr = kmap_atomic(full_page, KM_USER0);
+ tail_length = FILE_TAIL_LENGTH(mapping);
+ memcpy(kaddr, tail, tail_length);
+ memset(kaddr+tail_length, 0,
+ PAGE_CACHE_SIZE - tail_length);
+ kunmap_atomic(kaddr, KM_USER0);
+ SetPageUptodate(full_page);
+ }
+ kfree(tail);
+
+ /* finalize full_page */
+ lru_cache_add(full_page);
+ unlock_page(full_page);
+ page_cache_release(full_page);
+ full_page = NULL;
+
+ /* free short_page */
+ WARN_ON(PageLRU(short_page));
+ kmem_cache_free(tail_page_cachep, short_page);
+
+ /*
+ * unlikely, but check to see if there was no tail added
+ * back. We need to return with tree_lock held.
+ */
+ write_lock_irq(&mapping->tree_lock);
+
+ }
+ if (full_page)
+ page_cache_release(full_page);
+ return;
+}
+EXPORT_SYMBOL(__unpack_file_tail);
+
+void i_size_write(struct inode *inode, loff_t i_size)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ write_lock_irq(&mapping->tree_lock);
+ if (mapping->tail && (i_size > i_size_read(inode)))
+ __unpack_file_tail(mapping);
+ _i_size_write(inode, i_size);
+ write_unlock_irq(&mapping->tree_lock);
+}
+EXPORT_SYMBOL(i_size_write);
+
+static void init_once(void *ptr, kmem_cache_t *cachep, unsigned long flags)
+{
+ struct page *page = (struct page *)ptr;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ memset(page, 0, sizeof(struct page));
+ reset_page_mapcount(page);
+ INIT_LIST_HEAD(&page->lru);
+ SetPageTail(page);
+ }
+}
+
+static __init int file_tail_init(void)
+{
+ tail_page_cachep = kmem_cache_create("tail_page_cache",
+ sizeof(struct page), 0, 0,
+ init_once, NULL);
+ if (tail_page_cachep == NULL)
+ return -ENOMEM;
+ return 0;
+}
+__initcall(file_tail_init);
+
+/*
+ * If the page is clean, in use by no one else, and the data is sufficiently
+ * small, allocate a tail page, copy it's data, and replace the page with
+ * the tail page in the page cache.
+ *
+ * Caller must hold reference on the page.
+ */
+void pack_file_tail(struct page *full_page)
+{
+ int error;
+ pgoff_t index;
+ void *kaddr;
+ struct address_space *mapping = full_page->mapping;
+ struct page *short_page;
+ int size;
+ void *tail;
+
+ if (!mapping)
+ return;
+
+ if (TestSetPageLocked(full_page))
+ return;
+
+ size = FILE_TAIL_LENGTH(mapping);
+ index = FILE_TAIL_INDEX(mapping);
+
+ if ((size > PAGE_CACHE_SIZE / 2) || PageDirty(full_page) ||
+ !PageUptodate(full_page) || mapping_mapped(mapping) ||
+ (page_count(full_page) > 2) || page_has_buffers(full_page) ||
+ PageWriteback(full_page)) {
+ unlock_page(full_page);
+ return;
+ }
+
+ short_page = kmem_cache_alloc(tail_page_cachep, SLAB_KERNEL);
+ if (!short_page) {
+ unlock_page(full_page);
+ return;
+ }
+
+ tail = kmalloc(size, SLAB_KERNEL);
+
+ if (!tail) {
+ kmem_cache_free(tail_page_cachep, short_page);
+ unlock_page(full_page);
+ return;
+ }
+ set_page_count(short_page, 1);
+ short_page->flags = 0;
+ SetPageTail(short_page);
+
+ /* Copy the data into the tail */
+ kaddr = kmap_atomic(full_page, KM_USER0);
+ memcpy(tail, kaddr, size);
+ kunmap_atomic(kaddr,KM_USER0);
+ SetPageUptodate(short_page);
+
+ error = radix_tree_preload(mapping_gfp_mask(mapping));
+ if (error) {
+ kfree(tail);
+ kmem_cache_free(tail_page_cachep, short_page);
+ unlock_page(full_page);
+ return;
+ }
+ /*
+ * Take tree lock. Recheck that nobody else is using full_page,
+ * remove it from the page cache and add short page, all while holding
+ * the lock
+ */
+ write_lock_irq(&mapping->tree_lock);
+ /*
+ * Make sure the file size didn't change
+ */
+ if (mapping->tail || (full_page->index != FILE_TAIL_INDEX(mapping)) ||
+ (size != FILE_TAIL_LENGTH(mapping)) || mapping_mapped(mapping) ||
+ page_count(full_page) > 2) {
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+ kfree(tail);
+ kmem_cache_free(tail_page_cachep, short_page);
+ unlock_page(full_page);
+ return;
+ }
+ /* out with the old */
+ lru_cache_delete(full_page);
+ radix_tree_delete(&mapping->page_tree, index);
+ full_page->mapping = NULL;
+
+ /* in with the new */
+ mapping->tail = tail;
+ error = radix_tree_insert(&mapping->page_tree, index, short_page);
+ BUG_ON(error);
+ page_cache_get(short_page);
+ SetPageLocked(short_page);
+ short_page->mapping = mapping;
+ short_page->index = index;
+
+ write_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+
+ unlock_page(full_page);
+ page_cache_release(full_page); /* page cache reference */
+
+ /* We're done with this now */
+ lru_cache_add(short_page);
+ unlock_page(short_page);
+ page_cache_release(short_page);
+
+ return;
+}
+
+void page_cache_free_tail(struct page *page)
+{
+ kmem_cache_free(tail_page_cachep, page);
+}
--
David Kleikamp
IBM Linux Technology Center
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-07-18 4:08 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-18 4:08 [RFC:PATCH 000/008] Tail Packing in the the Page Cache Dave Kleikamp
2006-07-18 4:08 ` [RFC:PATCH 001/008] Changes to common header files Dave Kleikamp
2006-07-18 4:08 ` Dave Kleikamp [this message]
2006-07-18 4:08 ` [RFC:PATCH 003/008] Handle tail pages in kmap & kmap_atomic Dave Kleikamp
2006-07-18 4:08 ` [RFC:PATCH 004/008] Wrap i_size_write Dave Kleikamp
2006-07-18 4:08 ` [RFC:PATCH 005/008] unpack tail page to avoid memory mapping Dave Kleikamp
2006-07-18 4:08 ` [RFC:PATCH 006/008] Don't need to zero past end-of-file in file tail Dave Kleikamp
2006-07-18 4:09 ` [RFC:PATCH 007/008] Make sure tail page is freed correctly Dave Kleikamp
2006-07-18 4:09 ` [RFC:PATCH 008/008] Handle file tails in mm/filemap.c Dave Kleikamp
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060718040820.11926.12387.sendpatchset@kleikamp.austin.ibm.com \
--to=shaggy@austin.ibm.com \
--cc=dmccr@us.ibm.com \
--cc=linux-mm@kvack.org \
--cc=pbadari@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox