From: Dave Hansen <haveblue@us.ibm.com>
To: anand kumar <a_santha@rediffmail.com>
Cc: linux-mm@kvack.org, kernelnewbies@nl.linux.org
Subject: Re: Memory allocation problem
Date: Wed, 30 Apr 2003 15:36:20 -0700 [thread overview]
Message-ID: <3EB04FE4.6000404@us.ibm.com> (raw)
In-Reply-To: <20030430221438.16759.qmail@webmail35.rediffmail.com>
[-- Attachment #1: Type: text/plain, Size: 1079 bytes --]
anand kumar wrote:
> We are developing a PCI driver for a specialized hardware which
> needs blocks of physically contiguous memory regions of
> 32 KB. We need to allocate 514 such blocks for a total of 16 MB
> We were using an ioctl implementation in the driver which uses
> kmalloc() to allocate the required memory blocks.
> kmalloc()(GFP_KERNEL)
> fails after allocating some 250 blocks of memory (probably due to
> fragmentation).
> We then tried using __get_free_pages() and the result was the
> same.
Well, kmalloc() falls back to __get_free_pages() eventually (after going
through the slab cache), so it isn't much of a surprise that both failed.
> Even though the free pages in zone NORMAL and DMA were 10000 and
> 1500 respectively.
Can you try this on a kernel that has /proc/buddyinfo? It exports the
buddy allocators internal structures so that you can see the
fragmentation. buddyinfo has been in 2.5 since ~2.5.35. There may even
be a 2.4 version floating around...
If you can post some code too, it might be helpful.
--
Dave Hansen
haveblue@us.ibm.com
[-- Attachment #2: buddyinfo-2.5.34-0.patch --]
[-- Type: text/plain, Size: 3439 bytes --]
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.508.1.116 -> 1.513
# mm/page_alloc.c 1.89.1.8 -> 1.92
# fs/proc/proc_misc.c 1.34.1.2 -> 1.36
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/09 haveblue@elm3b96.(none) 1.513
# Merge elm3b96.(none):/work/dave/bk/linux-2.5
# into elm3b96.(none):/work/dave/bk/linux-2.5-buddyinfo
# --------------------------------------------
#
diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
--- a/fs/proc/proc_misc.c Wed Sep 11 10:45:01 2002
+++ b/fs/proc/proc_misc.c Wed Sep 11 10:45:01 2002
@@ -208,6 +208,20 @@
#undef K
}
+extern struct seq_operations fragmentation_op;
+static int fragmentation_open(struct inode *inode, struct file *file)
+{
+ (void)inode;
+ return seq_open(file, &fragmentation_op);
+}
+
+static struct file_operations fragmentation_file_operations = {
+ open: fragmentation_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
static int version_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
@@ -624,6 +638,7 @@
create_seq_entry("partitions", 0, &proc_partitions_operations);
create_seq_entry("interrupts", 0, &proc_interrupts_operations);
create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+ create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
#ifdef CONFIG_MODULES
create_seq_entry("modules", 0, &proc_modules_operations);
create_seq_entry("ksyms", 0, &proc_ksyms_operations);
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c Wed Sep 11 10:45:01 2002
+++ b/mm/page_alloc.c Wed Sep 11 10:45:01 2002
@@ -949,3 +949,69 @@
}
__setup("memfrac=", setup_mem_frac);
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static void *frag_start(struct seq_file *m, loff_t *pos)
+{
+ pg_data_t *pgdat;
+ loff_t node = *pos;
+
+ for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+ --node;
+
+ return pgdat;
+}
+
+static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+
+ (*pos)++;
+ return pgdat->pgdat_next;
+}
+
+static void frag_stop(struct seq_file *m, void *arg)
+{
+}
+
+/*
+ * This walks the freelist for each zone. Whilst this is slow, I'd rather
+ * be slow here than slow down the fast path by keeping stats - mjbligh
+ */
+static int frag_show(struct seq_file *m, void *arg)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+ zone_t *zone, *node_zones = pgdat->node_zones;
+ unsigned long flags;
+ int order;
+
+ for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+ if (!zone->size)
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+ for (order = 0; order < MAX_ORDER; ++order) {
+ unsigned long nr_bufs = 0;
+ list_t *elem;
+ list_for_each(elem, &zone->free_area[order].free_list)
+ ++nr_bufs;
+ seq_printf(m, "%6lu ", nr_bufs);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ seq_putc(m, '\n');
+ }
+ return 0;
+}
+
+struct seq_operations fragmentation_op = {
+ start: frag_start,
+ next: frag_next,
+ stop: frag_stop,
+ show: frag_show,
+};
+
+#endif /* CONFIG_PROC_FS */
next prev parent reply other threads:[~2003-04-30 22:36 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-04-30 22:14 anand kumar
2003-04-30 22:28 ` Greg KH
2003-04-30 22:29 ` Christoph Hellwig
2003-04-30 22:36 ` Dave Hansen [this message]
2003-05-01 13:20 Mark_H_Johnson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3EB04FE4.6000404@us.ibm.com \
--to=haveblue@us.ibm.com \
--cc=a_santha@rediffmail.com \
--cc=kernelnewbies@nl.linux.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox