From 4369ae16eec16e6a922d4333da88a58fbc201369 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:06 +0000
Subject: [PATCH 1/9] Add "thumbee" to the hwcap_str array

This part was missed in the initial patch adding ThumbEE support.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1f1eecca7f55..66e1a643ed14 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -772,6 +772,7 @@ static const char *hwcap_str[] = {
 	"java",
 	"iwmmxt",
 	"crunch",
+	"thumbee",
 	NULL
 };
 

From 2bedbdf4148ebbe48c7a89449ab52e475a788f42 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:07 +0000
Subject: [PATCH 2/9] Add HWCAP_NEON to the ARM hwcap.h file

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/hwcap.h | 1 +
 arch/arm/kernel/setup.c      | 1 +
 arch/arm/vfp/vfpmodule.c     | 9 +++++++++
 3 files changed, 11 insertions(+)

diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index 81f4c899a555..bda489f9f017 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -16,6 +16,7 @@
 #define HWCAP_IWMMXT	512
 #define HWCAP_CRUNCH	1024
 #define HWCAP_THUMBEE	2048
+#define HWCAP_NEON	4096
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 66e1a643ed14..d4dae3e9b294 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -773,6 +773,7 @@ static const char *hwcap_str[] = {
 	"iwmmxt",
 	"crunch",
 	"thumbee",
+	"neon",
 	NULL
 };
 
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index c0d2c9bb952b..67ca340a7c85 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -371,6 +371,15 @@ static int __init vfp_init(void)
 		 * in place; report VFP support to userspace.
 		 */
 		elf_hwcap |= HWCAP_VFP;
+#ifdef CONFIG_NEON
+		/*
+		 * Check for the presence of the Advanced SIMD
+		 * load/store instructions, integer and single
+		 * precision floating point operations.
+		 */
+		if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+			elf_hwcap |= HWCAP_NEON;
+#endif
 	}
 	return 0;
 }

From c30c2f99e10b6a810dae9a25b35c6d48796d8ffb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:07 +0000
Subject: [PATCH 3/9] ARMv7: Add extra barriers for flush_cache_all
 compressed/head.S

The flush_cache_all function on ARMv7 is implemented as a series of
cache operations by set/way. These are not guaranteed to be ordered with
previous memory accesses, requiring a DMB. This patch also adds barriers
for the TLB operations in compressed/head.S

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/boot/compressed/head.S | 14 ++++++++++----
 arch/arm/mm/cache-v7.S          |  2 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 84a1e0496a3c..7b1f31295a0a 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -717,6 +717,9 @@ __armv7_mmu_cache_off:
 		bl	__armv7_mmu_cache_flush
 		mov	r0, #0
 		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
+		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
+		mcr	p15, 0, r0, c7, c10, 4	@ DSB
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 		mov	pc, r12
 
 __arm6_mmu_cache_off:
@@ -778,12 +781,13 @@ __armv6_mmu_cache_flush:
 __armv7_mmu_cache_flush:
 		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
 		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
-		beq	hierarchical
 		mov	r10, #0
+		beq	hierarchical
 		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
 		b	iflush
 hierarchical:
-		stmfd	sp!, {r0-r5, r7, r9-r11}
+		mcr	p15, 0, r10, c7, c10, 5	@ DMB
+		stmfd	sp!, {r0-r5, r7, r9, r11}
 		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
 		ands	r3, r0, #0x7000000	@ extract loc from clidr
 		mov	r3, r3, lsr #23		@ left align loc bit field
@@ -820,12 +824,14 @@ skip:
 		cmp	r3, r10
 		bgt	loop1
 finished:
+		ldmfd	sp!, {r0-r5, r7, r9, r11}
 		mov	r10, #0			@ swith back to cache level 0
 		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
-		ldmfd	sp!, {r0-r5, r7, r9-r11}
 iflush:
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
 		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
-		mcr	p15, 0, r10, c7, c10, 4	@ drain WB
+		mcr	p15, 0, r10, c7, c10, 4	@ DSB
+		mcr	p15, 0, r10, c7, c5, 4	@ ISB
 		mov	pc, lr
 
 __armv5tej_mmu_cache_flush:
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index d19c2bec2b1f..be93ff02a98d 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -26,6 +26,7 @@
  *	- mm    - mm_struct describing address space
  */
 ENTRY(v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
 	ands	r3, r0, #0x7000000		@ extract loc from clidr
 	mov	r3, r3, lsr #23			@ left align loc bit field
@@ -64,6 +65,7 @@ skip:
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb
 	isb
 	mov	pc, lr
 ENDPROC(v7_flush_dcache_all)

From 24b647a042b988b017e6cdf60b47a0bfecd1dc41 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:08 +0000
Subject: [PATCH 4/9] ARMv7: Branch over conditional undefined instructions in
 vfphw.S

On ARMv7, conditional undefined instructions may generate exceptions
even if the condition is not met. The vfphw.S contains the FPINST and
FPINST2 access instructions which may not be present on processors with
synchronous VFP exceptions.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/vfp/vfphw.S | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a62dcf7098ba..3c73aafe3e01 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -101,9 +101,12 @@ ENTRY(vfp_support_entry)
 	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	VFPFMRX	r6, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	beq	1f
+	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
+1:
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
 					@ start of the register dump
@@ -117,9 +120,12 @@ no_old_VFP_process:
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
 	tst	r1, #FPEXC_EX		@ is there additional state to restore?
-	VFPFMXR	FPINST, r6, NE		@ restore FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
-	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	beq	1f
+	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
+1:
 	VFPFMXR	FPSCR, r5		@ restore status
 
 check_for_exception:
@@ -175,9 +181,12 @@ ENTRY(vfp_save_state)
 	VFPFSTMIA r0, r2		@ save the working registers
 	VFPFMRX	r2, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	VFPFMRX	r3, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
-	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
+	beq	1f
+	VFPFMRX	r3, FPINST		@ FPINST (only if FPEXC.EX is set)
+	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	beq	1f
+	VFPFMRX	r12, FPINST2		@ FPINST2 if needed (and present)
+1:
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
 ENDPROC(vfp_save_state)

From 376e14218d3d791127e9b9bfbe2f99c44c2a19c2 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:08 +0000
Subject: [PATCH 5/9] Do not flush the cache in flush_cache_v(un)map for VIPT
 caches

In case of non-aliasing VIPT caches, there is no need to flush the whole
cache when new mapping is created. The patch introduces this condition
check. In the non-aliasing VIPT case flush_cache_vmap() needs a DSB
since the set_pte_at() function called from vmap_pte_range() does not
have such barrier (done usually via TLB flushing functions).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/cacheflush.h | 36 ++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index de6c59f814a1..85a2514cbffc 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -15,6 +15,7 @@
 
 #include <asm/glue.h>
 #include <asm/shmparam.h>
+#include <asm/cachetype.h>
 
 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 
@@ -295,16 +296,6 @@ static inline void outer_flush_range(unsigned long start, unsigned long end)
 
 #endif
 
-/*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages.  Since the
- * direct-mappings of these pages may contain cached data, we need
- * to do a full cache flush to ensure that writebacks don't corrupt
- * data placed into these pages via the new mappings.
- */
-#define flush_cache_vmap(start, end)		flush_cache_all()
-#define flush_cache_vunmap(start, end)		flush_cache_all()
-
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
@@ -444,4 +435,29 @@ static inline void flush_ioremap_region(unsigned long phys, void __iomem *virt,
 	dmac_inv_range(start, start + size);
 }
 
+/*
+ * flush_cache_vmap() is used when creating mappings (eg, via vmap,
+ * vmalloc, ioremap etc) in kernel space for pages.  On non-VIPT
+ * caches, since the direct-mappings of these pages may contain cached
+ * data, we need to do a full cache flush to ensure that writebacks
+ * don't corrupt data placed into these pages via the new mappings.
+ */
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	if (!cache_is_vipt_nonaliasing())
+		flush_cache_all();
+	else
+		/*
+		 * set_pte_at() called from vmap_pte_range() does not
+		 * have a DSB after cleaning the cache line.
+		 */
+		dsb();
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+	if (!cache_is_vipt_nonaliasing())
+		flush_cache_all();
+}
+
 #endif

From 6b07d7fea0496374ff7754dc3d1dca03b2911828 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Nov 2008 13:23:08 +0000
Subject: [PATCH 6/9] ARMv7: Do not set TTBR0 in __v7_setup

This register is set in __enable_mmu in the head.S file.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/proc-v7.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 07f82db70945..41772960fd10 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -175,7 +175,6 @@ __v7_setup:
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r10, c2, c0, 2		@ TTB control register
 	orr	r4, r4, #TTB_RGN_OC_WB		@ mark PTWs outer cacheable, WB
-	mcr	p15, 0, r4, c2, c0, 0		@ load TTB0
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
 	mov	r10, #0x1f			@ domains 0, 1 = manager
 	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register

From 73b63efaac7352c9e2bf1570fac98fd44a99f8f9 Mon Sep 17 00:00:00 2001
From: Jon Callan <Jon.Callan@arm.com>
Date: Thu, 6 Nov 2008 13:23:09 +0000
Subject: [PATCH 7/9] ARMv7: Add SMP initialisation to proc-v7.S

This patch adds the SMP/nAMP mode setting to __v7_setup and also sets
TTBR to shared page table walks if SMP is enabled. The PTWs are also
marked inner cacheable for both SMP and UP modes (setting this is fine
even if the CPU doesn't support the feature).

Signed-off-by: Jon Callan <Jon.Callan@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/proc-v7.S | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 41772960fd10..721b7d53bfd8 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -20,9 +20,17 @@
 
 #define TTB_C		(1 << 0)
 #define TTB_S		(1 << 1)
+#define TTB_RGN_NC	(0 << 3)
+#define TTB_RGN_OC_WBWA	(1 << 3)
 #define TTB_RGN_OC_WT	(2 << 3)
 #define TTB_RGN_OC_WB	(3 << 3)
 
+#ifndef CONFIG_SMP
+#define TTB_FLAGS	TTB_C|TTB_RGN_OC_WB		@ mark PTWs cacheable, outer WB
+#else
+#define TTB_FLAGS	TTB_C|TTB_S|TTB_RGN_OC_WBWA	@ mark PTWs cacheable and shared, outer WBWA
+#endif
+
 ENTRY(cpu_v7_proc_init)
 	mov	pc, lr
 ENDPROC(cpu_v7_proc_init)
@@ -85,7 +93,7 @@ ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r2, #0
 	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
-	orr	r0, r0, #TTB_RGN_OC_WB		@ mark PTWs outer cacheable, WB
+	orr	r0, r0, #TTB_FLAGS
 	mcr	p15, 0, r2, c13, c0, 1		@ set reserved context ID
 	isb
 1:	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
@@ -162,6 +170,11 @@ cpu_v7_name:
  *	- cache type register is implemented
  */
 __v7_setup:
+#ifdef CONFIG_SMP
+	mrc	p15, 0, r0, c1, c0, 1		@ Enable SMP/nAMP mode
+	orr	r0, r0, #(0x1 << 6)
+	mcr	p15, 0, r0, c1, c0, 1
+#endif
 	adr	r12, __v7_setup_stack		@ the local stack
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
 	bl	v7_flush_dcache_all
@@ -174,7 +187,7 @@ __v7_setup:
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r10, c2, c0, 2		@ TTB control register
-	orr	r4, r4, #TTB_RGN_OC_WB		@ mark PTWs outer cacheable, WB
+	orr	r4, r4, #TTB_FLAGS
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
 	mov	r10, #0x1f			@ domains 0, 1 = manager
 	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register

From 8553cb67d2318db327071018fc81084cbabccc46 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 10 Nov 2008 14:14:11 +0000
Subject: [PATCH 8/9] Modern processors may need to drain the WB before WFI

Since WFI may cause the processor to enter a low-power mode, data may
still be in the write buffer. This patch adds a DSB (or DWB) to the
cpu_(v6|v7)_do_idle functions before the WFI.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/proc-v6.S | 2 ++
 arch/arm/mm/proc-v7.S | 1 +
 2 files changed, 3 insertions(+)

diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 294943b85973..f0cc599facb7 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -71,6 +71,8 @@ ENTRY(cpu_v6_reset)
  *	IRQs are already disabled.
  */
 ENTRY(cpu_v6_do_idle)
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c10, 4		@ DWB - WFI may enter a low-power mode
 	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
 	mov	pc, lr
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 721b7d53bfd8..0e11d9716a7d 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -63,6 +63,7 @@ ENDPROC(cpu_v7_reset)
  *	IRQs are already disabled.
  */
 ENTRY(cpu_v7_do_idle)
+	dsb					@ WFI may enter a low-power mode
 	wfi
 	mov	pc, lr
 ENDPROC(cpu_v7_do_idle)

From 7f1fd31db158c95418d9cc5690ab60ecc6fb632d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 10 Nov 2008 14:14:11 +0000
Subject: [PATCH 9/9] Fix the teehbr_read function prototype

A "void" was missing inside brackets.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/thumbee.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c
index df3f6b7ebcea..9cb7aaca159f 100644
--- a/arch/arm/kernel/thumbee.c
+++ b/arch/arm/kernel/thumbee.c
@@ -25,7 +25,7 @@
 /*
  * Access to the ThumbEE Handler Base register
  */
-static inline unsigned long teehbr_read()
+static inline unsigned long teehbr_read(void)
 {
 	unsigned long v;
 	asm("mrc	p14, 6, %0, c1, c0, 0\n" : "=r" (v));