linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Lennert Buytenhek <buytenh@wantstofly.org>
To: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>,
	Marek Szyprowski <m.szyprowski@samsung.com>,
	linux-arm-kernel@lists.infradead.org,
	linaro-mm-sig@lists.linaro.org, linux-mm@kvack.org,
	linux-arch@vger.kernel.org,
	'Kyungmin Park' <kyungmin.park@samsung.com>,
	'Joerg Roedel' <joro@8bytes.org>
Subject: Re: [PATCH 3/8] ARM: dma-mapping: use asm-generic/dma-mapping-common.h
Date: Fri, 15 Jul 2011 02:10:21 +0200	[thread overview]
Message-ID: <20110715001021.GM951@wantstofly.org> (raw)
In-Reply-To: <20110707123825.GO8286@n2100.arm.linux.org.uk>

On Thu, Jul 07, 2011 at 01:38:25PM +0100, Russell King - ARM Linux wrote:

> > > > > I suppose for the majority of the cases, the overhead of the indirect
> > > > > function call is near-zero, compared to the overhead of the cache
> > > > > management operation, so it would only make a difference for coherent
> > > > > systems without an IOMMU. Do we care about micro-optimizing those?
> > 
> > FWIW, when I was hacking on ARM access point routing performance some
> > time ago, turning the L1/L2 cache maintenance operations into inline
> > functions (inlined into the ethernet driver) gave me a significant and
> > measurable performance boost.
> 
> On what architecture?  Can you show what you did to gain that?

Patch is attached below.  It's an ugly product-specific hack, not
suitable for upstreaming in this form, etc etc, but IIRC it gave me
a ~5% improvement on packet routing.



>From 4e9ab8b1e5fd3a5d7abb3253b653a2990b377f97 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 9 Apr 2009 02:28:54 +0200
Subject: [PATCH] Inline dma_cache_maint()

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
---
 arch/arm/include/asm/cacheflush.h  |  174 ++++++++++++++++++++++++++++++++++++
 arch/arm/include/asm/dma-mapping.h |   24 +++++-
 arch/arm/mm/Kconfig                |    1 -
 arch/arm/mm/cache-feroceon-l2.c    |   10 ++-
 arch/arm/mm/dma-mapping.c          |   35 -------
 5 files changed, 205 insertions(+), 39 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 6cbd8fd..7cc28eb 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -228,9 +228,105 @@ extern struct cpu_cache_fns cpu_cache;
  * is visible to DMA, or data written by DMA to system memory is
  * visible to the CPU.
  */
+#if defined(CONFIG_ARCH_KIRKWOOD) || defined(CONFIG_ARCH_MV78XX0)
+#define CACHE_LINE_SIZE		32
+
+static inline void l1d_flush_mva(unsigned long addr)
+{
+	__asm__("mcr p15, 0, %0, c7, c14, 1" : : "r" (addr));
+}
+
+static inline void l1d_inv_mva_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 5, %0, c15, c14, 0\n\t"
+		"mcr p15, 5, %1, c15, c14, 1"
+		: : "r" (start), "r" (end));
+	raw_local_irq_restore(flags);
+}
+
+static inline void l1d_clean_mva_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 5, %0, c15, c13, 0\n\t"
+		"mcr p15, 5, %1, c15, c13, 1"
+		: : "r" (start), "r" (end));
+	raw_local_irq_restore(flags);
+}
+
+static inline void l1d_flush_mva_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 5, %0, c15, c15, 0\n\t"
+		"mcr p15, 5, %1, c15, c15, 1"
+		: : "r" (start), "r" (end));
+	raw_local_irq_restore(flags);
+}
+
+static inline void dmac_inv_range(const void *_start, const void *_end)
+{
+	unsigned long start = (unsigned long)_start;
+	unsigned long end = (unsigned long)_end;
+
+	/*
+	 * Clean and invalidate partial first cache line.
+	 */
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		l1d_flush_mva(start & ~(CACHE_LINE_SIZE - 1));
+		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+	}
+
+	/*
+	 * Clean and invalidate partial last cache line.
+	 */
+	if (start < end && end & (CACHE_LINE_SIZE - 1)) {
+		l1d_flush_mva(end & ~(CACHE_LINE_SIZE - 1));
+		end &= ~(CACHE_LINE_SIZE - 1);
+	}
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	if (start < end)
+		l1d_inv_mva_range(start, end - CACHE_LINE_SIZE);
+
+	dsb();
+}
+
+static inline void dmac_clean_range(const void *_start, const void *_end)
+{
+	unsigned long start = (unsigned long)_start;
+	unsigned long end = (unsigned long)_end;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+	l1d_clean_mva_range(start, end - CACHE_LINE_SIZE);
+
+	dsb();
+}
+
+static inline void dmac_flush_range(const void *_start, const void *_end)
+{
+	unsigned long start = (unsigned long)_start;
+	unsigned long end = (unsigned long)_end;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+	l1d_flush_mva_range(start, end - CACHE_LINE_SIZE);
+
+	dsb();
+}
+#else
 #define dmac_inv_range			cpu_cache.dma_inv_range
 #define dmac_clean_range		cpu_cache.dma_clean_range
 #define dmac_flush_range		cpu_cache.dma_flush_range
+#endif
 
 #else
 
@@ -286,12 +382,90 @@ static inline void outer_flush_range(unsigned long start, unsigned long end)
 
 #else
 
+#if defined(CONFIG_ARCH_KIRKWOOD) || defined(CONFIG_ARCH_MV78XX0)
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
+		"mcr p15, 1, %1, c15, c9, 5"
+		: : "r" (__phys_to_virt(start)), "r" (__phys_to_virt(end)));
+	raw_local_irq_restore(flags);
+}
+
+static inline void l2_clean_inv_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
+}
+
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
+		"mcr p15, 1, %1, c15, c11, 5"
+		: : "r" (__phys_to_virt(start)), "r" (__phys_to_virt(end)));
+	raw_local_irq_restore(flags);
+}
+
+static inline void outer_inv_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * Clean and invalidate partial first cache line.
+	 */
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+	}
+
+	/*
+	 * Clean and invalidate partial last cache line.
+	 */
+	if (start < end && end & (CACHE_LINE_SIZE - 1)) {
+		l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
+		end &= ~(CACHE_LINE_SIZE - 1);
+	}
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	if (start < end)
+		l2_inv_pa_range(start, end - CACHE_LINE_SIZE);
+
+	dsb();
+}
+
+static inline void outer_clean_range(unsigned long start, unsigned long end)
+{
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+	if (start != end)
+		l2_clean_pa_range(start, end - CACHE_LINE_SIZE);
+
+	dsb();
+}
+
+static inline void outer_flush_range(unsigned long start, unsigned long end)
+{
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+	if (start != end) {
+		l2_clean_pa_range(start, end - CACHE_LINE_SIZE);
+		l2_inv_pa_range(start, end - CACHE_LINE_SIZE);
+	}
+
+	dsb();
+}
+#else
 static inline void outer_inv_range(unsigned long start, unsigned long end)
 { }
 static inline void outer_clean_range(unsigned long start, unsigned long end)
 { }
 static inline void outer_flush_range(unsigned long start, unsigned long end)
 { }
+#endif
 
 #endif
 
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 22cb14e..10b517c 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -6,6 +6,7 @@
 #include <linux/mm_types.h>
 #include <linux/scatterlist.h>
 
+#include <asm/cacheflush.h>
 #include <asm-generic/dma-coherent.h>
 #include <asm/memory.h>
 
@@ -56,7 +57,28 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
  * platforms with CONFIG_DMABOUNCE.
  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
  */
-extern void dma_cache_maint(const void *kaddr, size_t size, int rw);
+static inline void
+dma_cache_maint(const void *start, size_t size, int direction)
+{
+//	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1));
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		dmac_inv_range(start, start + size);
+		outer_inv_range(__pa(start), __pa(start) + size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		dmac_clean_range(start, start + size);
+		outer_clean_range(__pa(start), __pa(start) + size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		dmac_flush_range(start, start + size);
+		outer_flush_range(__pa(start), __pa(start) + size);
+		break;
+//	default:
+//		BUG();
+	}
+}
 
 /*
  * Return whether the given device DMA address mask can be supported
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f37..3e4c526 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -690,7 +690,6 @@ config CACHE_FEROCEON_L2
 	bool "Enable the Feroceon L2 cache controller"
 	depends on ARCH_KIRKWOOD || ARCH_MV78XX0
 	default y
-	select OUTER_CACHE
 	help
 	  This option enables the Feroceon L2 cache controller.
 
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 355c2a1..f84e34f 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -17,6 +17,9 @@
 #include <plat/cache-feroceon-l2.h>
 
 
+static int l2_wt_override;
+
+#if 0
 /*
  * Low-level cache maintenance operations.
  *
@@ -94,12 +97,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
 {
 	l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end));
 }
+#endif
 
 static inline void l2_inv_all(void)
 {
 	__asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
 }
 
+#if 0
 /*
  * Linux primitives.
  *
@@ -110,8 +115,6 @@ static inline void l2_inv_all(void)
 #define CACHE_LINE_SIZE		32
 #define MAX_RANGE_SIZE		1024
 
-static int l2_wt_override;
-
 static unsigned long calc_range_end(unsigned long start, unsigned long end)
 {
 	unsigned long range_end;
@@ -204,6 +207,7 @@ static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
 
 	dsb();
 }
+#endif
 
 
 /*
@@ -318,9 +322,11 @@ void __init feroceon_l2_init(int __l2_wt_override)
 
 	disable_l2_prefetch();
 
+#if 0
 	outer_cache.inv_range = feroceon_l2_inv_range;
 	outer_cache.clean_range = feroceon_l2_clean_range;
 	outer_cache.flush_range = feroceon_l2_flush_range;
+#endif
 
 	enable_l2();
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f1ef561..d866150 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -482,41 +482,6 @@ static int __init consistent_init(void)
 
 core_initcall(consistent_init);
 
-/*
- * Make an area consistent for devices.
- * Note: Drivers should NOT use this function directly, as it will break
- * platforms with CONFIG_DMABOUNCE.
- * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
- */
-void dma_cache_maint(const void *start, size_t size, int direction)
-{
-	void (*inner_op)(const void *, const void *);
-	void (*outer_op)(unsigned long, unsigned long);
-
-	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1));
-
-	switch (direction) {
-	case DMA_FROM_DEVICE:		/* invalidate only */
-		inner_op = dmac_inv_range;
-		outer_op = outer_inv_range;
-		break;
-	case DMA_TO_DEVICE:		/* writeback only */
-		inner_op = dmac_clean_range;
-		outer_op = outer_clean_range;
-		break;
-	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		inner_op = dmac_flush_range;
-		outer_op = outer_flush_range;
-		break;
-	default:
-		BUG();
-	}
-
-	inner_op(start, start + size);
-	outer_op(__pa(start), __pa(start) + size);
-}
-EXPORT_SYMBOL(dma_cache_maint);
-
 /**
  * dma_map_sg - map a set of SG buffers for streaming mode DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
-- 
1.7.3.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2011-07-15  0:10 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-20  7:50 [PATCH/RFC 0/8] ARM: DMA-mapping framework redesign Marek Szyprowski
2011-06-20  7:50 ` [PATCH 1/8] ARM: dma-mapping: remove offset parameter to prepare for generic dma_ops Marek Szyprowski
2011-06-20  8:35   ` Michal Nazarewicz
2011-06-20 10:46     ` Marek Szyprowski
2011-07-03 15:28   ` Russell King - ARM Linux
2011-07-26 12:56     ` Marek Szyprowski
2011-06-20  7:50 ` [PATCH 2/8] ARM: dma-mapping: implement dma_map_single on top of dma_map_page Marek Szyprowski
2011-06-20 14:39   ` Russell King - ARM Linux
2011-06-20 15:15     ` Marek Szyprowski
2011-06-24 15:24       ` Arnd Bergmann
2011-06-27 14:29         ` Marek Szyprowski
2011-06-27 14:53           ` Arnd Bergmann
2011-06-27 15:06             ` Marek Szyprowski
2011-06-20  7:50 ` [PATCH 3/8] ARM: dma-mapping: use asm-generic/dma-mapping-common.h Marek Szyprowski
2011-06-20 14:33   ` [Linaro-mm-sig] " KyongHo Cho
2011-06-21 11:47     ` Marek Szyprowski
2011-06-24  8:39       ` 'Joerg Roedel'
2011-06-24 15:36   ` Arnd Bergmann
2011-06-27 12:18     ` Marek Szyprowski
2011-06-27 13:19       ` Arnd Bergmann
2011-07-07 12:09         ` Lennert Buytenhek
2011-07-07 12:38           ` Russell King - ARM Linux
2011-07-15  0:10             ` Lennert Buytenhek [this message]
2011-07-15  9:27               ` Russell King - ARM Linux
2011-07-15 21:53                 ` Lennert Buytenhek
2011-06-20  7:50 ` [PATCH 4/8] ARM: dma-mapping: implement dma sg methods on top of generic dma ops Marek Szyprowski
2011-06-20 14:37   ` KyongHo Cho
2011-06-20 14:40   ` Russell King - ARM Linux
2011-06-20 15:23     ` Marek Szyprowski
2011-06-20  7:50 ` [PATCH 5/8] ARM: dma-mapping: move all dma bounce code to separate dma ops structure Marek Szyprowski
2011-06-20 14:42   ` Russell King - ARM Linux
2011-06-20 15:31     ` Marek Szyprowski
2011-06-24 15:47       ` Arnd Bergmann
2011-06-27 14:20         ` Marek Szyprowski
2011-06-20  7:50 ` [PATCH 6/8] ARM: dma-mapping: remove redundant code and cleanup Marek Szyprowski
2011-06-20  7:50 ` [PATCH 7/8] common: dma-mapping: change alloc/free_coherent method to more generic alloc/free_attrs Marek Szyprowski
2011-06-20 14:45   ` KyongHo Cho
2011-06-20 15:06     ` Russell King - ARM Linux
2011-06-20 15:14       ` [Linaro-mm-sig] " KyongHo Cho
2011-06-21 11:23     ` Marek Szyprowski
2011-06-22  0:00       ` [Linaro-mm-sig] " KyongHo Cho
2011-06-24  7:20         ` Marek Szyprowski
2011-06-24 15:51   ` Arnd Bergmann
2011-06-24 16:15     ` James Bottomley
2011-06-24 16:23       ` Arnd Bergmann
2011-06-27 12:23     ` Marek Szyprowski
2011-06-27 13:22       ` Arnd Bergmann
2011-06-27 13:30         ` Marek Szyprowski
2011-06-24 15:53   ` Arnd Bergmann
2011-06-27 14:41     ` Marek Szyprowski
2011-06-20  7:50 ` [PATCH 8/8] ARM: dma-mapping: use alloc, mmap, free from dma_ops Marek Szyprowski
2011-06-22  6:53   ` [Linaro-mm-sig] " KyongHo Cho
2011-06-22  4:53 ` [Linaro-mm-sig] [PATCH/RFC 0/8] ARM: DMA-mapping framework redesign Subash Patel
2011-06-22  6:59   ` Marek Szyprowski
2011-06-22  8:53     ` Subash Patel
2011-06-22  9:27       ` Marek Szyprowski
2011-06-22 16:00         ` Jordan Crouse
2011-06-23 13:09           ` Subash Patel
2011-06-23 16:24             ` Michael K. Edwards
2011-06-23 22:09               ` Michael K. Edwards
2011-06-25  5:23                 ` Jonathan Morton
2011-06-25  9:55                   ` Michael K. Edwards
2011-06-26  0:06                     ` Jonathan Morton
2011-06-24 15:20           ` Arnd Bergmann
2011-06-24  9:18 ` Joerg Roedel
2011-06-24 14:26   ` Marek Szyprowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110715001021.GM951@wantstofly.org \
    --to=buytenh@wantstofly.org \
    --cc=arnd@arndb.de \
    --cc=joro@8bytes.org \
    --cc=kyungmin.park@samsung.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=linux@arm.linux.org.uk \
    --cc=m.szyprowski@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox