linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: Rik van Riel <riel@conectiva.com.br>,
	Andrea Arcangeli <andrea@suse.de>,
	MM mailing list <linux-mm@kvack.org>,
	linux-kernel@vger.rutgers.edu
Subject: Re: [patch] balanced highmem subsystem under pre7-9
Date: Fri, 12 May 2000 20:53:49 +0200 (CEST)	[thread overview]
Message-ID: <Pine.LNX.4.10.10005122044130.6188-200000@elte.hu> (raw)
In-Reply-To: <Pine.LNX.4.10.10005121111340.4959-100000@penguin.transmeta.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1186 bytes --]


On Fri, 12 May 2000, Linus Torvalds wrote:

> With such a setup, your patch makes lots of sense - trying to decouple
> the highmem zone as much as possible. But the more recent kernels
> should be better at not touching zones that don't need touching (it
> will still change the LRU information, though).

i initially tested pre7-9 and it showed bad behavior: high kswapd activity
trying to balance highmem, while the pagecache is primarily filled from
the highmem. I dont think this can be fixed without 'silencing'
ZONE_HIGHMEM's balancing activities: the pagecache allocates from highmem
so it puts direct pressure on the highmem zone.

This had two effects: wasted CPU time, but it also limited the
page-cache's maximum size to the size of highmem. I'll try the final
pre7-2.3.99 kernel as well in a minute to make sure. (i think the bad
behavior is still be there, judging from the differences between pre9 and
the final patch.)

(i've attached a patch against final-pre7, which is not complete and which
i'm not yet happy about (the kernel shows bad behavior if lots of dirty
data is generated by many processes), but it shows eg. the highmem.c
cleanup that is possible.)

	Ingo

[-- Attachment #2: Type: TEXT/PLAIN, Size: 5134 bytes --]

--- linux/mm/page_alloc.c.orig	Fri May 12 08:45:17 2000
+++ linux/mm/page_alloc.c	Fri May 12 09:14:58 2000
@@ -29,9 +29,9 @@
 pg_data_t *pgdat_list = (pg_data_t *)0;
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
-static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 128, };
-static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, };
-static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
+static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 1, };
+static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 0, };
+static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 0, };
 
 /*
  * Free_page() adds the page to the free lists. This is optimized for
@@ -271,7 +271,10 @@
 	if (!(current->flags & PF_MEMALLOC)) {
 		int gfp_mask = zonelist->gfp_mask;
 		if (!try_to_free_pages(gfp_mask)) {
-			if (!(gfp_mask & __GFP_HIGH))
+			/*
+			 * Non-highprio allocations fail here:
+			 */
+			if (!(gfp_mask & __GFP_PRIO))
 				goto fail;
 		}
 	}
@@ -440,6 +443,9 @@
 				zone = pgdat->node_zones + ZONE_NORMAL;
 				if (zone->size)
 					zonelist->zones[j++] = zone;
+				if ((i && __GFP_WAIT) || !(i && __GFP_PRIO) ||
+						(i && __GFP_IO))
+					break;
 			case ZONE_DMA:
 				zone = pgdat->node_zones + ZONE_DMA;
 				if (zone->size)
--- linux/mm/highmem.c.orig	Fri May 12 09:16:25 2000
+++ linux/mm/highmem.c	Fri May 12 09:27:14 2000
@@ -66,6 +66,13 @@
 	return new_page;
 }
 
+/*
+ * Special zonelist so we can just query the highmem pool and
+ * return immediately if there is no highmem page free.
+ */
+static zonelist_t high_zonelist =
+	{ { NODE_DATA(0)->node_zones + ZONE_HIGHMEM, NULL, }, __GFP_HIGHMEM };
+
 struct page * replace_with_highmem(struct page * page)
 {
 	struct page *highpage;
@@ -74,13 +81,11 @@
 	if (PageHighMem(page) || !nr_free_highpages())
 		return page;
 
-	highpage = alloc_page(GFP_ATOMIC|__GFP_HIGHMEM);
+	highpage = __alloc_pages(&high_zonelist, 0);
 	if (!highpage)
 		return page;
-	if (!PageHighMem(highpage)) {
-		__free_page(highpage);
-		return page;
-	}
+	if (!PageHighMem(highpage))
+		BUG();
 
 	vaddr = kmap(highpage);
 	copy_page((void *)vaddr, (void *)page_address(page));
--- linux/include/linux/mm.h.orig	Fri May 12 08:46:55 2000
+++ linux/include/linux/mm.h	Fri May 12 09:27:56 2000
@@ -471,33 +471,49 @@
  * GFP bitmasks..
  */
 #define __GFP_WAIT	0x01
-#define __GFP_HIGH	0x02
+#define __GFP_PRIO	0x02
 #define __GFP_IO	0x04
+/*
+ * indicates that the buffer will be suitable for DMA.  Ignored on some
+ * platforms, used as appropriate on others
+ */
 #define __GFP_DMA	0x08
+
+/*
+ * indicates that the buffer can be taken from high memory,
+ * which is not permanently mapped by the kernel
+ */
 #ifdef CONFIG_HIGHMEM
 #define __GFP_HIGHMEM	0x10
 #else
 #define __GFP_HIGHMEM	0x0 /* noop */
 #endif
 
-
-#define GFP_BUFFER	(__GFP_HIGH | __GFP_WAIT)
-#define GFP_ATOMIC	(__GFP_HIGH)
-#define GFP_USER	(__GFP_WAIT | __GFP_IO)
-#define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
-#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_KSWAPD	(__GFP_IO)
-
-/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
-   platforms, used as appropriate on others */
-
-#define GFP_DMA		__GFP_DMA
-
-/* Flag - indicates that the buffer can be taken from high memory which is not
-   permanently mapped by the kernel */
-
-#define GFP_HIGHMEM	__GFP_HIGHMEM
+/*
+ * The 5 GFP bits:
+ *	( __GFP_WAIT | __GFP_PRIO | __GFP_IO | __GFP_DMA | __GFP_HIGHMEM )
+ *
+ * The most typical combinations:
+ */
+
+#define GFP_BUFFER   \
+	( __GFP_WAIT | __GFP_PRIO | 0        | 0         | 0             )
+#define GFP_ATOMIC   \
+	( 0          | __GFP_PRIO | 0        | 0         | 0             )
+#define GFP_USER     \
+	( __GFP_WAIT | 0          | __GFP_IO | 0         | 0             )
+#define GFP_HIGHUSER \
+	( __GFP_WAIT | 0          | __GFP_IO | 0         | __GFP_HIGHMEM )
+#define GFP_KERNEL   \
+	( __GFP_WAIT | __GFP_PRIO | __GFP_IO | 0         | 0             )
+#define GFP_NFS      \
+	( __GFP_WAIT | __GFP_PRIO | __GFP_IO | 0         | 0             )
+#define GFP_KSWAPD   \
+	( 0          | 0          | __GFP_IO | 0         | 0             )
+#define GFP_DMA      \
+	( 0          | 0          | 0        | __GFP_DMA | 0             )
+#define GFP_HIGHMEM  \
+	( 0          | 0          | 0        | 0         | __GFP_HIGHMEM )
 
 /* vma is the first one with  address < vma->vm_end,
  * and even  address < vma->vm_start. Have to extend vma. */
--- linux/include/linux/slab.h.orig	Fri May 12 09:05:15 2000
+++ linux/include/linux/slab.h	Fri May 12 09:27:56 2000
@@ -22,7 +22,7 @@
 #define	SLAB_NFS		GFP_NFS
 #define	SLAB_DMA		GFP_DMA
 
-#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHMEM)
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_PRIO|__GFP_IO|__GFP_HIGHMEM)
 #define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
 
 /* flags to pass to kmem_cache_create().

  reply	other threads:[~2000-05-12 18:53 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2000-05-08 17:21 [PATCH] Recent VM fiasco - fixed Zlatko Calusic
2000-05-08 17:43 ` Rik van Riel
2000-05-08 18:16   ` Zlatko Calusic
2000-05-08 18:20     ` Linus Torvalds
2000-05-08 18:46     ` Rik van Riel
2000-05-08 18:53       ` Zlatko Calusic
2000-05-08 19:04         ` Rik van Riel
2000-05-09  7:56   ` Daniel Stone
2000-05-09  8:25     ` Christoph Rohland
2000-05-09 15:44       ` Linus Torvalds
2000-05-09 16:12         ` Simon Kirby
2000-05-09 17:42         ` Christoph Rohland
2000-05-09 19:50           ` Linus Torvalds
2000-05-10 11:25             ` Christoph Rohland
2000-05-10 11:50               ` Zlatko Calusic
2000-05-11 23:40                 ` Mark Hahn
2000-05-10  4:05         ` James H. Cloos Jr.
2000-05-10  7:29           ` James H. Cloos Jr.
2000-05-11  0:16             ` Linus Torvalds
2000-05-11  0:32               ` Linus Torvalds
2000-05-11 16:36                 ` [PATCH] Recent VM fiasco - fixed (pre7-9) Rajagopal Ananthanarayanan
2000-05-11  1:04               ` [PATCH] Recent VM fiasco - fixed Juan J. Quintela
2000-05-11  1:53                 ` Simon Kirby
2000-05-11  7:23                   ` Linus Torvalds
2000-05-11 14:17                     ` Simon Kirby
2000-05-11 23:38                       ` Simon Kirby
2000-05-12  0:09                         ` Linus Torvalds
2000-05-12  2:51                           ` [RFC][PATCH] shrink_mmap avoid list_del (Was: Re: [PATCH] Recent VM fiasco - fixed) Roger Larsson
2000-05-11 11:15                   ` [PATCH] Recent VM fiasco - fixed Rik van Riel
2000-05-11  5:10                 ` Linus Torvalds
2000-05-11 10:09                   ` James H. Cloos Jr.
2000-05-11 17:25                   ` Juan J. Quintela
2000-05-11 23:25                   ` [patch] balanced highmem subsystem under pre7-9 Ingo Molnar
2000-05-11 23:46                     ` Linus Torvalds
2000-05-12  0:08                       ` Ingo Molnar
2000-05-12  0:15                         ` Ingo Molnar
2000-05-12  9:02                     ` Christoph Rohland
2000-05-12  9:56                       ` Ingo Molnar
2000-05-12 11:49                         ` Christoph Rohland
2000-05-12 16:12                       ` Linus Torvalds
2000-05-12 10:57                     ` Andrea Arcangeli
2000-05-12 12:11                       ` Ingo Molnar
2000-05-12 12:57                         ` Andrea Arcangeli
2000-05-12 13:20                           ` Rik van Riel
2000-05-12 16:40                             ` Ingo Molnar
2000-05-12 17:15                               ` Rik van Riel
2000-05-12 18:15                               ` Linus Torvalds
2000-05-12 18:53                                 ` Ingo Molnar [this message]
2000-05-12 19:06                                   ` Linus Torvalds
2000-05-12 19:36                                     ` Ingo Molnar
2000-05-12 19:40                                     ` Ingo Molnar
2000-05-12 19:54                                     ` Ingo Molnar
2000-05-12 22:48                                       ` Rik van Riel
2000-05-13 11:57                                         ` Stephen C. Tweedie
2000-05-13 12:03                                           ` Rik van Riel
2000-05-13 12:14                                             ` Ingo Molnar
2000-05-13 14:23                                               ` Ingo Molnar
2000-05-19  1:58                               ` Andrea Arcangeli
2000-05-19 15:03                                 ` Rik van Riel
2000-05-19 16:08                                   ` Andrea Arcangeli
2000-05-19 17:05                                     ` Rik van Riel
2000-05-19 22:28                                     ` Linus Torvalds
2000-05-11 11:12               ` [PATCH] Recent VM fiasco - fixed Christoph Rohland
2000-05-11 17:38               ` Steve Dodd
2000-05-09 10:21     ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.10.10005122044130.6188-200000@elte.hu \
    --to=mingo@elte.hu \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.rutgers.edu \
    --cc=linux-mm@kvack.org \
    --cc=riel@conectiva.com.br \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox