From 0c549ba1cfe1d2d61c07eccab7f274740b706ed6 Mon Sep 17 00:00:00 2001
From: Michael Gernoth <michael@gernoth.net>
Date: Mon, 23 May 2011 23:35:06 +0200
Subject: [PATCH 1/1] update kexec to 2.6.39 version, still doesn't work...

---
 Makefile          |   2 +-
 abort-ev7.S       |  21 ++
 assembler.h       | 292 +++++++++++++++++++++++
 cache-v7.S        |  72 ++++--
 copypage-v6.c     |  48 ----
 entry-header.S    |  35 ++-
 hwcap.h           |  33 +++
 kexec.c           |   2 +-
 machine_kexec.c   |  52 +++-
 pgtable.h         | 484 ++++++++++++++++++++++++++++++++++++++
 proc-macros.S     |  61 +++--
 proc-v7.S         | 287 +++++++++++++++++------
 relocate_kernel.S |   8 +
 tlb-v7.S          |  28 +--
 tlbflush.h        | 587 ++++++++++++++++++++++++++++++++++++++++++++++
 15 files changed, 1820 insertions(+), 192 deletions(-)
 create mode 100644 assembler.h
 create mode 100644 hwcap.h
 create mode 100644 pgtable.h
 create mode 100644 tlbflush.h

diff --git a/Makefile b/Makefile
index 61a5644..bc86aac 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ EXTRA_CFLAGS += -DCONFIG_KEXEC -Wall
 
 obj-m += kexec_load.o
 kexec_load-objs := kexec.o machine_kexec.o idmap.o sys.o core.o relocate_kernel.o \
-	proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o pabort-v7.o copypage-v6.o entry-common.o driver_sys.o
+	proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o pabort-v7.o copypage-v6.o driver_sys.o
 
 all:
 	PATH=$(CROSS_PATH):$(PATH) CROSS_COMPILE=$(CROSS_COMPILE) ARCH=$(ARCH) make -C $(KDIR) M=$(PWD) modules
diff --git a/abort-ev7.S b/abort-ev7.S
index 2e6dc04..ec88b15 100644
--- a/abort-ev7.S
+++ b/abort-ev7.S
@@ -29,5 +29,26 @@ ENTRY(v7_early_abort)
 	 * V6 code adjusts the returned DFSR.
 	 * New designs should not need to patch up faults.
 	 */
+
+#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
+	/*
+	 * Detect erroneous permission failures and fix
+	 */
+	ldr	r3, =0x40d			@ On permission fault
+	and	r3, r1, r3
+	cmp	r3, #0x0d
+	movne	pc, lr
+
+	mcr	p15, 0, r0, c7, c8, 0   	@ Retranslate FAR
+	isb
+	mrc	p15, 0, r2, c7, c4, 0   	@ Read the PAR
+	and	r3, r2, #0x7b   		@ On translation fault
+	cmp	r3, #0x0b
+	movne	pc, lr
+	bic	r1, r1, #0xf			@ Fix up FSR FS[5:0]
+	and	r2, r2, #0x7e
+	orr	r1, r1, r2, LSR #1
+#endif
+
 	mov	pc, lr
 ENDPROC(v7_early_abort)
diff --git a/assembler.h b/assembler.h
new file mode 100644
index 0000000..bc2d2d7
--- /dev/null
+++ b/assembler.h
@@ -0,0 +1,292 @@
+/*
+ *  arch/arm/include/asm/assembler.h
+ *
+ *  Copyright (C) 1996-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains arm architecture specific defines
+ *  for the different processors.
+ *
+ *  Do not include any C declarations in this file - it is included by
+ *  assembler source.
+ */
+#ifndef __ASSEMBLY__
+#error "Only include this from assembly code"
+#endif
+
+#include <asm/ptrace.h>
+#include <asm/domain.h>
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull            lsr
+#define push            lsl
+#define get_byte_0      lsl #0
+#define get_byte_1	lsr #8
+#define get_byte_2	lsr #16
+#define get_byte_3	lsr #24
+#define put_byte_0      lsl #0
+#define put_byte_1	lsl #8
+#define put_byte_2	lsl #16
+#define put_byte_3	lsl #24
+#else
+#define pull            lsl
+#define push            lsr
+#define get_byte_0	lsr #24
+#define get_byte_1	lsr #16
+#define get_byte_2	lsr #8
+#define get_byte_3      lsl #0
+#define put_byte_0	lsl #24
+#define put_byte_1	lsl #16
+#define put_byte_2	lsl #8
+#define put_byte_3      lsl #0
+#endif
+
+/*
+ * Data preload for architectures that support it
+ */
+#if __LINUX_ARM_ARCH__ >= 5
+#define PLD(code...)	code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory.  Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
+/*
+ * Enable and disable interrupts
+ */
+#if __LINUX_ARM_ARCH__ >= 6
+	.macro	disable_irq_notrace
+	cpsid	i
+	.endm
+
+	.macro	enable_irq_notrace
+	cpsie	i
+	.endm
+#else
+	.macro	disable_irq_notrace
+	msr	cpsr_c, #PSR_I_BIT | SVC_MODE
+	.endm
+
+	.macro	enable_irq_notrace
+	msr	cpsr_c, #SVC_MODE
+	.endm
+#endif
+
+	.macro asm_trace_hardirqs_off
+#if defined(CONFIG_TRACE_IRQFLAGS)
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl	trace_hardirqs_off
+	ldmia	sp!, {r0-r3, ip, lr}
+#endif
+	.endm
+
+	.macro asm_trace_hardirqs_on_cond, cond
+#if defined(CONFIG_TRACE_IRQFLAGS)
+	/*
+	 * actually the registers should be pushed and pop'd conditionally, but
+	 * after bl the flags are certainly clobbered
+	 */
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl\cond	trace_hardirqs_on
+	ldmia	sp!, {r0-r3, ip, lr}
+#endif
+	.endm
+
+	.macro asm_trace_hardirqs_on
+	asm_trace_hardirqs_on_cond al
+	.endm
+
+	.macro disable_irq
+	disable_irq_notrace
+	asm_trace_hardirqs_off
+	.endm
+
+	.macro enable_irq
+	asm_trace_hardirqs_on
+	enable_irq_notrace
+	.endm
+/*
+ * Save the current IRQ state and disable IRQs.  Note that this macro
+ * assumes FIQs are enabled, and that the processor is in SVC mode.
+ */
+	.macro	save_and_disable_irqs, oldcpsr
+	mrs	\oldcpsr, cpsr
+	disable_irq
+	.endm
+
+/*
+ * Restore interrupt state previously stored in a register.  We don't
+ * guarantee that this will preserve the flags.
+ */
+	.macro	restore_irqs_notrace, oldcpsr
+	msr	cpsr_c, \oldcpsr
+	.endm
+
+	.macro restore_irqs, oldcpsr
+	tst	\oldcpsr, #PSR_I_BIT
+	asm_trace_hardirqs_on_cond eq
+	restore_irqs_notrace \oldcpsr
+	.endm
+
+#define USER(x...)				\
+9999:	x;					\
+	.pushsection __ex_table,"a";		\
+	.align	3;				\
+	.long	9999b,9001f;			\
+	.popsection
+
+#ifdef CONFIG_SMP
+#define ALT_SMP(instr...)					\
+9998:	instr
+/*
+ * Note: if you get assembler errors from ALT_UP() when building with
+ * CONFIG_THUMB2_KERNEL, you almost certainly need to use
+ * ALT_SMP( W(instr) ... )
+ */
+#define ALT_UP(instr...)					\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+9997:	instr							;\
+	.if . - 9997b != 4					;\
+		.error "ALT_UP() content must assemble to exactly 4 bytes";\
+	.endif							;\
+	.popsection
+#define ALT_UP_B(label)					\
+	.equ	up_b_offset, label - 9998b			;\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+	W(b)	. + up_b_offset					;\
+	.popsection
+#else
+#define ALT_SMP(instr...)
+#define ALT_UP(instr...) instr
+#define ALT_UP_B(label) b label
+#endif
+
+/*
+ * SMP data memory barrier
+ */
+	.macro	smp_dmb mode
+#ifdef CONFIG_SMP
+#if __LINUX_ARM_ARCH__ >= 7
+	.ifeqs "\mode","arm"
+	ALT_SMP(dmb)
+	.else
+	ALT_SMP(W(dmb))
+	.endif
+#elif __LINUX_ARM_ARCH__ == 6
+	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
+#else
+#error Incompatible SMP platform
+#endif
+	.ifeqs "\mode","arm"
+	ALT_UP(nop)
+	.else
+	ALT_UP(W(nop))
+	.endif
+#endif
+	.endm
+
+#ifdef CONFIG_THUMB2_KERNEL
+	.macro	setmode, mode, reg
+	mov	\reg, #\mode
+	msr	cpsr_c, \reg
+	.endm
+#else
+	.macro	setmode, mode, reg
+	msr	cpsr_c, #\mode
+	.endm
+#endif
+
+/*
+ * STRT/LDRT access macros with ARM and Thumb-2 variants
+ */
+#ifdef CONFIG_THUMB2_KERNEL
+
+	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
+9999:
+	.if	\inc == 1
+	\instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
+	.elseif	\inc == 4
+	\instr\cond\()\t\().w \reg, [\ptr, #\off]
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.pushsection __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.popsection
+	.endm
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	@ explicit IT instruction needed because of the label
+	@ introduced by the USER macro
+	.ifnc	\cond,al
+	.if	\rept == 1
+	itt	\cond
+	.elseif	\rept == 2
+	ittt	\cond
+	.else
+	.error	"Unsupported rept macro argument"
+	.endif
+	.endif
+
+	@ Slightly optimised to avoid incrementing the pointer twice
+	usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
+	.if	\rept == 2
+	usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
+	.endif
+
+	add\cond \ptr, #\rept * \inc
+	.endm
+
+#else	/* !CONFIG_THUMB2_KERNEL */
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
+	.rept	\rept
+9999:
+	.if	\inc == 1
+	\instr\cond\()b\()\t \reg, [\ptr], #\inc
+	.elseif	\inc == 4
+	\instr\cond\()\t \reg, [\ptr], #\inc
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.pushsection __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.popsection
+	.endr
+	.endm
+
+#endif	/* CONFIG_THUMB2_KERNEL */
+
+	.macro	strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	str, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
+
+	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
diff --git a/cache-v7.S b/cache-v7.S
index e1bd975..ae604c9 100644
--- a/cache-v7.S
+++ b/cache-v7.S
@@ -12,11 +12,26 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
-#include <asm/assembler.h>
+#include "assembler.h"
 #include <asm/unwind.h>
 
 #include "proc-macros.S"
 
+/*
+ *	v7_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ */
+ENTRY(v7_flush_icache_all)
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
+	mov	pc, lr
+ENDPROC(v7_flush_icache_all)
+
 /*
  *	v7_flush_dcache_all()
  *
@@ -81,7 +96,7 @@ ENDPROC(v7_flush_dcache_all)
  *	Flush the entire cache system.
  *  The data cache flush is now achieved using atomic clean / invalidates
  *  working outwards from L1 cache. This is done using Set/Way based cache
- *  maintainance instructions.
+ *  maintenance instructions.
  *  The instruction cache can still be invalidated back to the point of
  *  unification in a single instruction.
  *
@@ -91,7 +106,8 @@ ENTRY(v7_flush_kern_cache_all)
  THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
 	bl	v7_flush_dcache_all
 	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
  ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
  THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
 	mov	pc, lr
@@ -157,17 +173,25 @@ ENTRY(v7_coherent_user_range)
  UNWIND(.fnstart		)
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
-	bic	r0, r0, r3
+	bic	r12, r0, r3
 1:
- USER(	mcr	p15, 0, r0, c7, c11, 1	)	@ clean D line to the point of unification
+ USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
 	dsb
- USER(	mcr	p15, 0, r0, c7, c5, 1	)	@ invalidate I line
-	add	r0, r0, r2
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
 2:
-	cmp	r0, r1
-	blo	1b
+ USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+3:
 	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
 	dsb
 	isb
 	mov	pc, lr
@@ -177,25 +201,26 @@ ENTRY(v7_coherent_user_range)
  * isn't mapped, just try the next page.
  */
 9001:
-	mov	r0, r0, lsr #12
-	mov	r0, r0, lsl #12
-	add	r0, r0, #4096
-	b	2b
+	mov	r12, r12, lsr #12
+	mov	r12, r12, lsl #12
+	add	r12, r12, #4096
+	b	3b
  UNWIND(.fnend		)
 ENDPROC(v7_coherent_kern_range)
 ENDPROC(v7_coherent_user_range)
 
 /*
- *	v7_flush_kern_dcache_page(kaddr)
+ *	v7_flush_kern_dcache_area(void *addr, size_t size)
  *
  *	Ensure that the data held in the page kaddr is written back
  *	to the page in question.
  *
- *	- kaddr   - kernel address (guaranteed to be page aligned)
+ *	- addr	- kernel address
+ *	- size	- region size
  */
-ENTRY(v7_flush_kern_dcache_page)
+ENTRY(v7_flush_kern_dcache_area)
 	dcache_line_size r2, r3
-	add	r1, r0, #PAGE_SZ
+	add	r1, r0, r1
 1:
 	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
 	add	r0, r0, r2
@@ -203,7 +228,7 @@ ENTRY(v7_flush_kern_dcache_page)
 	blo	1b
 	dsb
 	mov	pc, lr
-ENDPROC(v7_flush_kern_dcache_page)
+ENDPROC(v7_flush_kern_dcache_area)
 
 /*
  *	v7_dma_inv_range(start,end)
@@ -215,7 +240,7 @@ ENDPROC(v7_flush_kern_dcache_page)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-ENTRY(v7_dma_inv_range)
+v7_dma_inv_range:
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
 	tst	r0, r3
@@ -239,7 +264,7 @@ ENDPROC(v7_dma_inv_range)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-ENTRY(v7_dma_clean_range)
+v7_dma_clean_range:
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
 	bic	r0, r0, r3
@@ -274,13 +299,12 @@ ENDPROC(v7_dma_flush_range)
 
 	.type	v7_cache_fns, #object
 ENTRY(v7_cache_fns)
+	.long	v7_flush_icache_all
 	.long	v7_flush_kern_cache_all
 	.long	v7_flush_user_cache_all
 	.long	v7_flush_user_cache_range
 	.long	v7_coherent_kern_range
 	.long	v7_coherent_user_range
-	.long	v7_flush_kern_dcache_page
-	.long	v7_dma_inv_range
-	.long	v7_dma_clean_range
+	.long	v7_flush_kern_dcache_area
 	.long	v7_dma_flush_range
 	.size	v7_cache_fns, . - v7_cache_fns
diff --git a/copypage-v6.c b/copypage-v6.c
index 9cc8485..a9ace4e 100644
--- a/copypage-v6.c
+++ b/copypage-v6.c
@@ -1,13 +1,3 @@
-/*
- *  linux/arch/arm/mm/copypage-v6.c
- *
- *  Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved.
- * This Edition is maintained by Matthew Veety (aliasxerog) <mveety@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
@@ -21,43 +11,5 @@
 
 #include "mm.h"
 
-#if SHMLBA > 16384
-#error FIX ME
-#endif
-
-#define from_address	(0xffff8000)
-#define to_address	(0xffffc000)
-
-/*
- * Copy the user page.  No aliasing to deal with so we can just
- * attack the kernel's existing mapping of these pages.
- */
-
-static void v6_copy_user_highpage_nonaliasing(struct page *to,
-	struct page *from, unsigned long vaddr)
-{
-	void *kto, *kfrom;
-
-	kfrom = kmap_atomic(from, KM_USER0);
-	kto = kmap_atomic(to, KM_USER1);
-	copy_page(kto, kfrom);
-	kunmap_atomic(kto, KM_USER1);
-	kunmap_atomic(kfrom, KM_USER0);
-}
-
-/*
- * Clear the user page.  No aliasing to deal with so we can just
- * attack the kernel's existing mapping of this page.
- */
-static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr)
-{
-	void *kaddr = kmap_atomic(page, KM_USER0);
-	clear_page(kaddr);
-	kunmap_atomic(kaddr, KM_USER0);
-}
-
-
 struct cpu_user_fns v6_user_fns __initdata = {
-	.cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing,
-	.cpu_copy_user_highpage	= v6_copy_user_highpage_nonaliasing,
 };
diff --git a/entry-header.S b/entry-header.S
index 7e9ed1e..051166c 100644
--- a/entry-header.S
+++ b/entry-header.S
@@ -76,13 +76,13 @@
 #ifndef CONFIG_THUMB2_KERNEL
 	.macro	svc_exit, rpsr
 	msr	spsr_cxsf, \rpsr
-#if defined(CONFIG_CPU_32v6K)
-	clrex					@ clear the exclusive monitor
-	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
-#elif defined (CONFIG_CPU_V6)
+#if defined(CONFIG_CPU_V6)
 	ldr	r0, [sp]
 	strex	r1, r2, [sp]			@ clear the exclusive monitor
 	ldmib	sp, {r1 - pc}^			@ load r1 - pc, cpsr
+#elif defined(CONFIG_CPU_32v6K)
+	clrex					@ clear the exclusive monitor
+	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
 #else
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
 #endif
@@ -92,16 +92,18 @@
 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
 	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
 	msr	spsr_cxsf, r1			@ save in spsr_svc
-#if defined(CONFIG_CPU_32v6K)
-	clrex					@ clear the exclusive monitor
-#elif defined (CONFIG_CPU_V6)
+#if defined(CONFIG_CPU_V6)
 	strex	r1, r2, [sp]			@ clear the exclusive monitor
+#elif defined(CONFIG_CPU_32v6K)
+	clrex					@ clear the exclusive monitor
 #endif
 	.if	\fast
 	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
 	.else
 	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
 	.endif
+	mov	r0, r0				@ ARMv5T and earlier require a nop
+						@ after ldm {}^
 	add	sp, sp, #S_FRAME_SIZE - S_PC
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
@@ -163,6 +165,25 @@
 	.endm
 #endif	/* !CONFIG_THUMB2_KERNEL */
 
+	@
+	@ Debug exceptions are taken as prefetch or data aborts.
+	@ We must disable preemption during the handler so that
+	@ we can access the debug registers safely.
+	@
+	.macro	debug_entry, fsr
+#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT)
+	ldr	r4, =0x40f		@ mask out fsr.fs
+	and	r5, r4, \fsr
+	cmp	r5, #2			@ debug exception
+	bne	1f
+	get_thread_info r10
+	ldr	r6, [r10, #TI_PREEMPT]	@ get preempt count
+	add	r11, r6, #1		@ increment it
+	str	r11, [r10, #TI_PREEMPT]
+1:
+#endif
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.
diff --git a/hwcap.h b/hwcap.h
new file mode 100644
index 0000000..c1062c3
--- /dev/null
+++ b/hwcap.h
@@ -0,0 +1,33 @@
+#ifndef __ASMARM_HWCAP_H
+#define __ASMARM_HWCAP_H
+
+/*
+ * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ */
+#define HWCAP_SWP	1
+#define HWCAP_HALF	2
+#define HWCAP_THUMB	4
+#define HWCAP_26BIT	8	/* Play it safe */
+#define HWCAP_FAST_MULT	16
+#define HWCAP_FPA	32
+#define HWCAP_VFP	64
+#define HWCAP_EDSP	128
+#define HWCAP_JAVA	256
+#define HWCAP_IWMMXT	512
+#define HWCAP_CRUNCH	1024
+#define HWCAP_THUMBEE	2048
+#define HWCAP_NEON	4096
+#define HWCAP_VFPv3	8192
+#define HWCAP_VFPv3D16	16384
+#define HWCAP_TLS	32768
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+#define ELF_HWCAP	(elf_hwcap)
+extern unsigned int elf_hwcap;
+#endif
+
+#endif
diff --git a/kexec.c b/kexec.c
index e7ea604..7cdf7c2 100644
--- a/kexec.c
+++ b/kexec.c
@@ -1409,7 +1409,7 @@ int kernel_kexec(void)
 	{
 		kernel_restart_prepare(NULL);
 		printk(KERN_EMERG "Starting new kernel\n");
-		machine_shutdown();
+		//machine_shutdown();
 	}
 
 	machine_kexec(kexec_image);
diff --git a/machine_kexec.c b/machine_kexec.c
index 598ca61..531120f 100644
--- a/machine_kexec.c
+++ b/machine_kexec.c
@@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page;
 extern unsigned long kexec_mach_type;
 extern unsigned long kexec_boot_atags;
 
+static atomic_t waiting_for_crash_ipi;
+
 /*
  * Provide a dummy crash_notes definition while crash dump arrives to arm.
  * This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
@@ -37,14 +39,47 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
-void machine_shutdown(void)
+void machine_crash_nonpanic_core(void *unused)
 {
+	struct pt_regs regs;
+
+	crash_setup_regs(&regs, NULL);
+	printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
+	       smp_processor_id());
+	crash_save_cpu(&regs, smp_processor_id());
+	flush_cache_all();
+
+	atomic_dec(&waiting_for_crash_ipi);
+	while (1)
+		cpu_relax();
 }
 
 void machine_crash_shutdown(struct pt_regs *regs)
 {
+	unsigned long msecs;
+
+	local_irq_disable();
+
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+	smp_call_function(machine_crash_nonpanic_core, NULL, false);
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+	if (atomic_read(&waiting_for_crash_ipi) > 0)
+		printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
+
+	crash_save_cpu(regs, smp_processor_id());
+
+	printk(KERN_INFO "Loading crashdump kernel...\n");
 }
 
+/*
+ * Function pointer to optional machine-specific reinitialization
+ */
+void (*kexec_reinit)(void);
+
 void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list;
@@ -74,7 +109,20 @@ void machine_kexec(struct kimage *image)
 			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
-	cpu_proc_fin();
+	if (kexec_reinit)
+		kexec_reinit();
+	local_irq_disable();
+	local_fiq_disable();
 	setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/
+	flush_cache_all();
+#if 0
+	outer_flush_all();
+	outer_disable();
+#endif
+	cpu_proc_fin();
+#if 0
+	outer_inv_all();
+#endif
+	flush_cache_all();
 	cpu_reset(reboot_code_buffer_phys);
 }
diff --git a/pgtable.h b/pgtable.h
new file mode 100644
index 0000000..5750704
--- /dev/null
+++ b/pgtable.h
@@ -0,0 +1,484 @@
+/*
+ *  arch/arm/include/asm/pgtable.h
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASMARM_PGTABLE_H
+#define _ASMARM_PGTABLE_H
+
+#include <linux/const.h>
+#include <asm-generic/4level-fixup.h>
+#include <asm/proc-fns.h>
+
+#ifndef CONFIG_MMU
+
+#include "pgtable-nommu.h"
+
+#else
+
+#include <asm/memory.h>
+#include <mach/vmalloc.h>
+#include <asm/pgtable-hwdef.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * Note that platforms may override VMALLOC_START, but they must provide
+ * VMALLOC_END.  VMALLOC_END defines the (exclusive) limit of this space,
+ * which may not overlap IO space.
+ */
+#ifndef VMALLOC_START
+#define VMALLOC_OFFSET		(8*1024*1024)
+#define VMALLOC_START		(((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+#endif
+
+/*
+ * Hardware-wise, we have a two level page table structure, where the first
+ * level has 4096 entries, and the second level has 256 entries.  Each entry
+ * is one 32-bit word.  Most of the bits in the second level entry are used
+ * by hardware, and there aren't any "accessed" and "dirty" bits.
+ *
+ * Linux on the other hand has a three level page table structure, which can
+ * be wrapped to fit a two level page table structure easily - using the PGD
+ * and PTE only.  However, Linux also expects one "PTE" table per page, and
+ * at least a "dirty" bit.
+ *
+ * Therefore, we tweak the implementation slightly - we tell Linux that we
+ * have 2048 entries in the first level, each of which is 8 bytes (iow, two
+ * hardware pointers to the second level.)  The second level contains two
+ * hardware PTE tables arranged contiguously, preceded by Linux versions
+ * which contain the state information Linux needs.  We, therefore, end up
+ * with 512 entries in the "PTE" level.
+ *
+ * This leads to the page tables having the following layout:
+ *
+ *    pgd             pte
+ * |        |
+ * +--------+
+ * |        |       +------------+ +0
+ * +- - - - +       | Linux pt 0 |
+ * |        |       +------------+ +1024
+ * +--------+ +0    | Linux pt 1 |
+ * |        |-----> +------------+ +2048
+ * +- - - - + +4    |  h/w pt 0  |
+ * |        |-----> +------------+ +3072
+ * +--------+ +8    |  h/w pt 1  |
+ * |        |       +------------+ +4096
+ *
+ * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
+ * PTE_xxx for definitions of bits appearing in the "h/w pt".
+ *
+ * PMD_xxx definitions refer to bits in the first level page table.
+ *
+ * The "dirty" bit is emulated by only granting hardware write permission
+ * iff the page is marked "writable" and "dirty" in the Linux PTE.  This
+ * means that a write to a clean page will cause a permission fault, and
+ * the Linux MM layer will mark the page dirty via handle_pte_fault().
+ * For the hardware to notice the permission change, the TLB entry must
+ * be flushed, and ptep_set_access_flags() does that for us.
+ *
+ * The "accessed" or "young" bit is emulated by a similar method; we only
+ * allow accesses to the page if the "young" bit is set.  Accesses to the
+ * page will cause a fault, and handle_pte_fault() will set the young bit
+ * for us as long as the page is marked present in the corresponding Linux
+ * PTE entry.  Again, ptep_set_access_flags() will ensure that the TLB is
+ * up to date.
+ *
+ * However, when the "young" bit is cleared, we deny access to the page
+ * by clearing the hardware PTE.  Currently Linux does not flush the TLB
+ * for us in this case, which means the TLB will retain the transation
+ * until either the TLB entry is evicted under pressure, or a context
+ * switch which changes the user space mapping occurs.
+ */
+#define PTRS_PER_PTE		512
+#define PTRS_PER_PMD		1
+#define PTRS_PER_PGD		2048
+
+#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))
+#define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32))
+
+/*
+ * PMD_SHIFT determines the size of the area a second-level page table can map
+ * PGDIR_SHIFT determines what a third-level page table entry can map
+ */
+#define PMD_SHIFT		21
+#define PGDIR_SHIFT		21
+
+#define LIBRARY_TEXT_START	0x0c000000
+
+#ifndef __ASSEMBLY__
+extern void __pte_error(const char *file, int line, pte_t);
+extern void __pmd_error(const char *file, int line, pmd_t);
+extern void __pgd_error(const char *file, int line, pgd_t);
+
+#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte)
+#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd)
+#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd)
+#endif /* !__ASSEMBLY__ */
+
+#define PMD_SIZE		(1UL << PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE-1))
+#define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+
+/*
+ * This is the lowest virtual address we can permit any user space
+ * mapping to be mapped at.  This is particularly important for
+ * non-high vector CPUs.
+ */
+#define FIRST_USER_ADDRESS	PAGE_SIZE
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT		20
+#define SECTION_SIZE		(1UL << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+
+/*
+ * ARMv6 supersection address mask and size definitions.
+ */
+#define SUPERSECTION_SHIFT	24
+#define SUPERSECTION_SIZE	(1UL << SUPERSECTION_SHIFT)
+#define SUPERSECTION_MASK	(~(SUPERSECTION_SIZE-1))
+
+/*
+ * "Linux" PTE definitions.
+ *
+ * We keep two sets of PTEs - the hardware and the linux version.
+ * This allows greater flexibility in the way we map the Linux bits
+ * onto the hardware tables, and allows us to have YOUNG and DIRTY
+ * bits.
+ *
+ * The PTE table pointer refers to the hardware entries; the "Linux"
+ * entries are stored 1024 bytes below.
+ */
+#define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
+#define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
+#define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
+#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 6)
+#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)
+#define L_PTE_USER		(_AT(pteval_t, 1) << 8)
+#define L_PTE_XN		(_AT(pteval_t, 1) << 9)
+#define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
+
+/*
+ * These are the memory types, defined to be compatible with
+ * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ */
+#define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
+#define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
+#define L_PTE_MT_WRITETHROUGH	(_AT(pteval_t, 0x02) << 2)	/* 0010 */
+#define L_PTE_MT_WRITEBACK	(_AT(pteval_t, 0x03) << 2)	/* 0011 */
+#define L_PTE_MT_MINICACHE	(_AT(pteval_t, 0x06) << 2)	/* 0110 (sa1100, xscale) */
+#define L_PTE_MT_WRITEALLOC	(_AT(pteval_t, 0x07) << 2)	/* 0111 */
+#define L_PTE_MT_DEV_SHARED	(_AT(pteval_t, 0x04) << 2)	/* 0100 */
+#define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
+#define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
+#define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
+#define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * The pgprot_* and protection_map entries will be fixed up in runtime
+ * to include the cachable and bufferable bits based on memory policy,
+ * as well as any architecture dependent bits like global/ASID and SMP
+ * shared mapping bits.
+ */
+#define _L_PTE_DEFAULT	L_PTE_PRESENT | L_PTE_YOUNG
+
+extern pgprot_t		pgprot_user;
+extern pgprot_t		pgprot_kernel;
+
+#define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
+
+#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY)
+#define PAGE_SHARED		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN)
+#define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER)
+#define PAGE_COPY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define PAGE_COPY_EXEC		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
+#define PAGE_READONLY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
+#define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
+#define PAGE_KERNEL_EXEC	pgprot_kernel
+
+#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
+#define __PAGE_SHARED_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
+#define __PAGE_COPY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_COPY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY)
+#define __PAGE_READONLY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_READONLY_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY)
+
+#define __pgprot_modify(prot,mask,bits)		\
+	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
+#define pgprot_noncached(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
+#define pgprot_writecombine(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
+
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+#define pgprot_dmacoherent(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#else
+#define pgprot_dmacoherent(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED | L_PTE_XN)
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The table below defines the page protection levels that we insert into our
+ * Linux page table version.  These get translated into the best that the
+ * architecture can perform.  Note that on most ARM hardware:
+ *  1) We cannot do execute protection
+ *  2) If we could do execute protection, then read is implied
+ *  3) write implies read permissions
+ */
+#define __P000  __PAGE_NONE
+#define __P001  __PAGE_READONLY
+#define __P010  __PAGE_COPY
+#define __P011  __PAGE_COPY
+#define __P100  __PAGE_READONLY_EXEC
+#define __P101  __PAGE_READONLY_EXEC
+#define __P110  __PAGE_COPY_EXEC
+#define __P111  __PAGE_COPY_EXEC
+
+#define __S000  __PAGE_NONE
+#define __S001  __PAGE_READONLY
+#define __S010  __PAGE_SHARED
+#define __S011  __PAGE_SHARED
+#define __S100  __PAGE_READONLY_EXEC
+#define __S101  __PAGE_READONLY_EXEC
+#define __S110  __PAGE_SHARED_EXEC
+#define __S111  __PAGE_SHARED_EXEC
+
+#ifndef __ASSEMBLY__
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr)	(empty_zero_page)
+
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
+
+#define pgd_offset(mm, addr)	((mm)->pgd + pgd_index(addr))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+#define pgd_none(pgd)		(0)
+#define pgd_bad(pgd)		(0)
+#define pgd_present(pgd)	(1)
+#define pgd_clear(pgdp)		do { } while (0)
+#define set_pgd(pgd,pgdp)	do { } while (0)
+#define set_pud(pud,pudp)	do { } while (0)
+
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, addr)	((pmd_t *)(dir))
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define pmd_present(pmd)	(pmd_val(pmd))
+#define pmd_bad(pmd)		(pmd_val(pmd) & 2)
+
+#define copy_pmd(pmdpd,pmdps)		\
+	do {				\
+		pmdpd[0] = pmdps[0];	\
+		pmdpd[1] = pmdps[1];	\
+		flush_pmd_entry(pmdpd);	\
+	} while (0)
+
+#define pmd_clear(pmdp)			\
+	do {				\
+		pmdp[0] = __pmd(0);	\
+		pmdp[1] = __pmd(0);	\
+		clean_pmd_entry(pmdp);	\
+	} while (0)
+
+static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+{
+	return __va(pmd_val(pmd) & PAGE_MASK);
+}
+
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
+
+/* we don't need complex calculations here as the pmd is folded into the pgd */
+#define pmd_addr_end(addr,end)	(end)
+
+
+#ifndef CONFIG_HIGHPTE
+#define __pte_map(pmd)		pmd_page_vaddr(*(pmd))
+#define __pte_unmap(pte)	do { } while (0)
+#else
+#define __pte_map(pmd)		(pte_t *)kmap_atomic(pmd_page(*(pmd)))
+#define __pte_unmap(pte)	kunmap_atomic(pte)
+#endif
+
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(pmd,addr)	(pmd_page_vaddr(*(pmd)) + pte_index(addr))
+
+#define pte_offset_map(pmd,addr)	(__pte_map(pmd) + pte_index(addr))
+#define pte_unmap(pte)			__pte_unmap(pte)
+
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn,prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define mk_pte(page,prot)	pfn_pte(page_to_pfn(page), prot)
+
+#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
+#define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
+
+#if __LINUX_ARM_ARCH__ < 6
+static inline void __sync_icache_dcache(pte_t pteval)
+{
+}
+#else
+extern void __sync_icache_dcache(pte_t pteval);
+#endif
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	if (addr >= TASK_SIZE)
+		set_pte_ext(ptep, pteval, 0);
+	else {
+		__sync_icache_dcache(pteval);
+		set_pte_ext(ptep, pteval, PTE_EXT_NG);
+	}
+}
+
+#define pte_none(pte)		(!pte_val(pte))
+#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
+#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+#define pte_special(pte)	(0)
+
+#define pte_present_user(pte) \
+	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+	 (L_PTE_PRESENT | L_PTE_USER))
+
+#define PTE_BIT_FUNC(fn,op) \
+static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY);
+PTE_BIT_FUNC(mkwrite,   &= ~L_PTE_RDONLY);
+PTE_BIT_FUNC(mkclean,   &= ~L_PTE_DIRTY);
+PTE_BIT_FUNC(mkdirty,   |= L_PTE_DIRTY);
+PTE_BIT_FUNC(mkold,     &= ~L_PTE_YOUNG);
+PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG);
+
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER;
+	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+	return pte;
+}
+
+/*
+ * Encode and decode a swap entry.  Swap entries are stored in the Linux
+ * page tables as follows:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   <--------------- offset --------------------> <- type --> 0 0 0
+ *
+ * This gives us up to 63 swap files and 32GB per swap file.  Note that
+ * the offset field is always non-zero.
+ */
+#define __SWP_TYPE_SHIFT	3
+#define __SWP_TYPE_BITS		6
+#define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
+#define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+
+#define __swp_type(x)		(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
+#define __swp_offset(x)		((x).val >> __SWP_OFFSET_SHIFT)
+#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
+
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(swp)	((pte_t) { (swp).val })
+
+/*
+ * It is an error for the kernel to have more swap files than we can
+ * encode in the PTEs.  This ensures that we know when MAX_SWAPFILES
+ * is increased beyond what we presently support.
+ */
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+/*
+ * Encode and decode a file entry.  File entries are stored in the Linux
+ * page tables as follows:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   <----------------------- offset ------------------------> 1 0 0
+ */
+#define pte_file(pte)		(pte_val(pte) & L_PTE_FILE)
+#define pte_to_pgoff(x)		(pte_val(x) >> 3)
+#define pgoff_to_pte(x)		__pte(((x) << 3) | L_PTE_FILE)
+
+#define PTE_FILE_MAX_BITS	29
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+/* FIXME: this is not correct */
+#define kern_addr_valid(addr)	(1)
+
+#include <asm-generic/pgtable.h>
+
+/*
+ * We provide our own arch_get_unmapped_area to cope with VIPT caches.
+ */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+/*
+ * remap a physical page `pfn' of size `size' with page protection `prot'
+ * into virtual address `from'
+ */
+#define io_remap_pfn_range(vma,from,pfn,size,prot) \
+		remap_pfn_range(vma, from, pfn, size, prot)
+
+#define pgtable_cache_init() do { } while (0)
+
+void identity_mapping_add(pgd_t *, unsigned long, unsigned long);
+void identity_mapping_del(pgd_t *, unsigned long, unsigned long);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_MMU */
+
+#endif /* _ASMARM_PGTABLE_H */
diff --git a/proc-macros.S b/proc-macros.S
index 7d63bea..34261f9 100644
--- a/proc-macros.S
+++ b/proc-macros.S
@@ -61,27 +61,37 @@
 	.endm
 
 /*
- * cache_line_size - get the cache line size from the CSIDR register
- * (available on ARMv7+). It assumes that the CSSR register was configured
- * to access the L1 data cache CSIDR.
+ * dcache_line_size - get the minimum D-cache line size from the CTR register
+ * on ARMv7.
  */
 	.macro	dcache_line_size, reg, tmp
-	mrc	p15, 1, \tmp, c0, c0, 0		@ read CSIDR
-	and	\tmp, \tmp, #7			@ cache line size encoding
-	mov	\reg, #16			@ size offset
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+	lsr	\tmp, \tmp, #16
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size
 	.endm
 
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register
+ * on ARMv7.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
+	mov	\reg, \reg, lsl \tmp		@ actual cache line size
+	.endm
 
 /*
  * Sanity check the PTE configuration for the code below - which makes
- * certain assumptions about how these bits are layed out.
+ * certain assumptions about how these bits are laid out.
  */
 #ifdef CONFIG_MMU
 #if L_PTE_SHARED != PTE_EXT_SHARED
 #error PTE shared bit mismatch
 #endif
-#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\
+#if (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
      L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED
 #error Invalid Linux PTE bit settings
 #endif
@@ -99,6 +109,10 @@
  *  110x   0   1   0	r/w	r/o
  *  11x0   0   1   0	r/w	r/o
  *  1111   0   1   1	r/w	r/w
+ *
+ * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:
+ *  110x   1   1   1	r/o	r/o
+ *  11x0   1   1   1	r/o	r/o
  */
 	.macro	armv6_mt_table pfx
 \pfx\()_mt_table:
@@ -121,7 +135,7 @@
 	.endm
 
 	.macro	armv6_set_pte_ext pfx
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0], #2048			@ linux version
 
 	bic	r3, r1, #0x000003fc
 	bic	r3, r3, #PTE_TYPE_MASK
@@ -132,17 +146,20 @@
 	and	r2, r1, #L_PTE_MT_MASK
 	ldr	r2, [ip, r2]
 
-	tst	r1, #L_PTE_WRITE
-	tstne	r1, #L_PTE_DIRTY
-	orreq	r3, r3, #PTE_EXT_APX
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_DIRTY|L_PTE_RDONLY
+	orrne	r3, r3, #PTE_EXT_APX
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
-	tst	r1, #L_PTE_EXEC
-	orreq	r3, r3, #PTE_EXT_XN
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
 
 	orr	r3, r3, r2
 
@@ -170,9 +187,9 @@
  *  1111  0xff	r/w	r/w
  */
 	.macro	armv3_set_pte_ext wc_disable=1
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0], #2048			@ linux version
 
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
 
 	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
 	bic	r2, r2, #PTE_TYPE_MASK
@@ -181,7 +198,7 @@
 	tst	r3, #L_PTE_USER			@ user?
 	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
 
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
 	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
 
 	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
@@ -193,7 +210,7 @@
 	bicne	r2, r2, #PTE_BUFFERABLE
 #endif
 	.endif
-	str	r2, [r0]			@ hardware version
+	str	r2, [r0]		@ hardware version
 	.endm
 
 
@@ -213,9 +230,9 @@
  *  1111  11	r/w	r/w
  */
 	.macro	xscale_set_pte_ext_prologue
-	str	r1, [r0], #-2048		@ linux version
+	str	r1, [r0]			@ linux version
 
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
 
 	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
 	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
@@ -223,7 +240,7 @@
 	tst	r3, #L_PTE_USER			@ user?
 	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
 
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
 	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
 						@ combined with user -> user r/w
 	.endm
@@ -232,7 +249,7 @@
 	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
 	movne	r2, #0				@ no -> fault
 
-	str	r2, [r0]			@ hardware version
+	str	r2, [r0, #2048]!		@ hardware version
 	mov	ip, #0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
 	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
diff --git a/proc-v7.S b/proc-v7.S
index d2a8074..402ec2b 100644
--- a/proc-v7.S
+++ b/proc-v7.S
@@ -11,11 +11,11 @@
  */
 #include <linux/init.h>
 #include <linux/linkage.h>
-#include <asm/assembler.h>
+#include "hwcap.h"
+#include "assembler.h"
 #include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
+#include "pgtable.h"
 
 #include "proc-macros.S"
 
@@ -30,29 +30,24 @@
 #define TTB_IRGN_WT	((1 << 0) | (0 << 6))
 #define TTB_IRGN_WB	((1 << 0) | (1 << 6))
 
-#ifndef CONFIG_SMP
 /* PTWs cacheable, inner WB not shareable, outer WB not shareable */
-#define TTB_FLAGS	TTB_IRGN_WB|TTB_RGN_OC_WB
-#define PMD_FLAGS	PMD_SECT_WB
-#else
+#define TTB_FLAGS_UP	TTB_IRGN_WB|TTB_RGN_OC_WB
+#define PMD_FLAGS_UP	PMD_SECT_WB
+
 /* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
-#define TTB_FLAGS	TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA
-#define PMD_FLAGS	PMD_SECT_WBWA|PMD_SECT_S
-#endif
+#define TTB_FLAGS_SMP	TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA
+#define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
 
 ENTRY(cpu_v7_proc_init)
 	mov	pc, lr
 ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
-	stmfd	sp!, {lr}
-	cpsid	if				@ disable interrupts
-	bl	v7_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 ENDPROC(cpu_v7_proc_fin)
 
 /*
@@ -63,8 +58,6 @@ ENDPROC(cpu_v7_proc_fin)
  *	to what would be the reset vector.
  *
  *	- loc   - location to jump to for soft reset
- *
- *	It is assumed that:
  */
 	.align	5
 ENTRY(cpu_v7_reset)
@@ -110,14 +103,21 @@ ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r2, #0
 	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
-	orr	r0, r0, #TTB_FLAGS
+	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
 #ifdef CONFIG_ARM_ERRATA_430973
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
+#endif
+#ifdef CONFIG_ARM_ERRATA_754322
+	dsb
 #endif
 	mcr	p15, 0, r2, c13, c0, 1		@ set reserved context ID
 	isb
 1:	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
 	isb
+#ifdef CONFIG_ARM_ERRATA_754322
+	dsb
+#endif
 	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
 	isb
 #endif
@@ -130,15 +130,13 @@ ENDPROC(cpu_v7_switch_mm)
  *	Set a level 2 translation table entry.
  *
  *	- ptep  - pointer to level 2 translation table entry
- *		  (hardware version is stored at -1024 bytes)
+ *		  (hardware version is stored at +2048 bytes)
  *	- pte   - PTE value to store
  *	- ext	- value for extended PTE bits
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
- ARM(	str	r1, [r0], #-2048	)	@ linux version
- THUMB(	str	r1, [r0]		)	@ linux version
- THUMB(	sub	r0, r0, #2048		)
+	str	r1, [r0]			@ linux version
 
 	bic	r3, r1, #0x000003f0
 	bic	r3, r3, #PTE_TYPE_MASK
@@ -148,23 +146,28 @@ ENTRY(cpu_v7_set_pte_ext)
 	tst	r1, #1 << 4
 	orrne	r3, r3, #PTE_EXT_TEX(1)
 
-	tst	r1, #L_PTE_WRITE
-	tstne	r1, #L_PTE_DIRTY
-	orreq	r3, r3, #PTE_EXT_APX
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_RDONLY | L_PTE_DIRTY
+	orrne	r3, r3, #PTE_EXT_APX
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
+#ifdef CONFIG_CPU_USE_DOMAINS
+	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
 
-	tst	r1, #L_PTE_EXEC
-	orreq	r3, r3, #PTE_EXT_XN
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
 
-	str	r3, [r0]
+ ARM(	str	r3, [r0, #2048]! )
+ THUMB(	add	r0, r0, #2048 )
+ THUMB(	str	r3, [r0] )
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
 #endif
 	mov	pc, lr
@@ -174,7 +177,88 @@ cpu_v7_name:
 	.ascii	"ARMv7 Processor"
 	.align
 
-	__INIT
+	/*
+	 * Memory region attributes with SCTLR.TRE=1
+	 *
+	 *   n = TEX[0],C,B
+	 *   TR = PRRR[2n+1:2n]		- memory type
+	 *   IR = NMRR[2n+1:2n]		- inner cacheable property
+	 *   OR = NMRR[2n+17:2n+16]	- outer cacheable property
+	 *
+	 *			n	TR	IR	OR
+	 *   UNCACHED		000	00
+	 *   BUFFERABLE		001	10	00	00
+	 *   WRITETHROUGH	010	10	10	10
+	 *   WRITEBACK		011	10	11	11
+	 *   reserved		110
+	 *   WRITEALLOC		111	10	01	01
+	 *   DEV_SHARED		100	01
+	 *   DEV_NONSHARED	100	01
+	 *   DEV_WC		001	10
+	 *   DEV_CACHED		011	10
+	 *
+	 * Other attributes:
+	 *
+	 *   DS0 = PRRR[16] = 0		- device shareable property
+	 *   DS1 = PRRR[17] = 1		- device shareable property
+	 *   NS0 = PRRR[18] = 0		- normal shareable property
+	 *   NS1 = PRRR[19] = 1		- normal shareable property
+	 *   NOS = PRRR[24+n] = 1	- not outer shareable
+	 */
+.equ	PRRR,	0xff0a81a8
+.equ	NMRR,	0x40e040e0
+
+/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
+.globl	cpu_v7_suspend_size
+.equ	cpu_v7_suspend_size, 4 * 8
+#if 0
+ENTRY(cpu_v7_do_suspend)
+	stmfd	sp!, {r4 - r11, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
+	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r7, c2, c0, 0	@ TTB 0
+	mrc	p15, 0, r8, c2, c0, 1	@ TTB 1
+	mrc	p15, 0, r9, c1, c0, 0	@ Control register
+	mrc	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
+	mrc	p15, 0, r11, c1, c0, 2	@ Co-processor access control
+	stmia	r0, {r4 - r11}
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_v7_do_suspend)
+
+ENTRY(cpu_v7_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
+	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
+	ldmia	r0, {r4 - r11}
+	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
+	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r7, c2, c0, 0	@ TTB 0
+	mcr	p15, 0, r8, c2, c0, 1	@ TTB 1
+	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
+	mcr	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
+	mcr	p15, 0, r11, c1, c0, 2	@ Co-processor access control
+	ldr	r4, =PRRR		@ PRRR
+	ldr	r5, =NMRR		@ NMRR
+	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
+	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
+	isb
+	mov	r0, r9			@ control register
+	mov	r2, r7, lsr #14		@ get TTB0 base
+	mov	r2, r2, lsl #14
+	ldr	r3, cpu_resume_l1_flags
+	b	cpu_resume_mmu
+ENDPROC(cpu_v7_do_resume)
+cpu_resume_l1_flags:
+	ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP)
+	ALT_UP(.long  PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP)
+#else
+#define cpu_v7_do_suspend	0
+#define cpu_v7_do_resume	0
+#endif
+
+	__CPUINIT
 
 /*
  *	__v7_setup
@@ -191,13 +275,15 @@ cpu_v7_name:
  *	It is assumed that:
  *	- cache type register is implemented
  */
-__v7_setup:
+__v7_ca9mp_setup:
 #ifdef CONFIG_SMP
-	mrc	p15, 0, r0, c1, c0, 1
+	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)
+	ALT_UP(mov	r0, #(1 << 6))		@ fake it for UP
 	tst	r0, #(1 << 6)			@ SMP/nAMP mode enabled?
 	orreq	r0, r0, #(1 << 6) | (1 << 0)	@ Enable SMP/nAMP mode and
 	mcreq	p15, 0, r0, c1, c0, 1		@ TLB ops broadcasting
 #endif
+__v7_setup:
 	adr	r12, __v7_setup_stack		@ the local stack
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
 	bl	v7_flush_dcache_all
@@ -206,11 +292,16 @@ __v7_setup:
 	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
 	and	r10, r0, #0xff000000		@ ARM?
 	teq	r10, #0x41000000
-	bne	2f
+	bne	3f
 	and	r5, r0, #0x00f00000		@ variant
 	and	r6, r0, #0x0000000f		@ revision
-	orr	r0, r6, r5, lsr #20-4		@ combine variant and revision
+	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
+	ubfx	r0, r0, #4, #12			@ primary part number
 
+	/* Cortex-A8 Errata */
+	ldr	r10, =0x00000c08		@ Cortex-A8 primary part number
+	teq	r0, r10
+	bne	2f
 #ifdef CONFIG_ARM_ERRATA_430973
 	teq	r5, #0x00100000			@ only present in r1p*
 	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
@@ -218,21 +309,56 @@ __v7_setup:
 	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_458693
-	teq	r0, #0x20			@ only present in r2p0
+	teq	r6, #0x20			@ only present in r2p0
 	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
 	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
 	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
 	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_460075
-	teq	r0, #0x20			@ only present in r2p0
+	teq	r6, #0x20			@ only present in r2p0
 	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
 	tsteq	r10, #1 << 22
 	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
 	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
 #endif
+	b	3f
+
+	/* Cortex-A9 Errata */
+2:	ldr	r10, =0x00000c09		@ Cortex-A9 primary part number
+	teq	r0, r10
+	bne	3f
+#ifdef CONFIG_ARM_ERRATA_742230
+	cmp	r6, #0x22			@ only present up to r2p2
+	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrle	r10, r10, #1 << 4		@ set bit #4
+	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_742231
+	teq	r6, #0x20			@ present in r2p0
+	teqne	r6, #0x21			@ present in r2p1
+	teqne	r6, #0x22			@ present in r2p2
+	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orreq	r10, r10, #1 << 12		@ set bit #12
+	orreq	r10, r10, #1 << 22		@ set bit #22
+	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_743622
+	teq	r6, #0x20			@ present in r2p0
+	teqne	r6, #0x21			@ present in r2p1
+	teqne	r6, #0x22			@ present in r2p2
+	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orreq	r10, r10, #1 << 6		@ set bit #6
+	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_751472
+	cmp	r6, #0x30			@ present prior to r3p0
+	mrclt	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrlt	r10, r10, #1 << 11		@ set bit #11
+	mcrlt	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
 
-2:	mov	r10, #0
+3:	mov	r10, #0
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #endif
@@ -240,40 +366,11 @@ __v7_setup:
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r10, c2, c0, 2		@ TTB control register
-	orr	r4, r4, #TTB_FLAGS
+	ALT_SMP(orr	r4, r4, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r4, r4, #TTB_FLAGS_UP)
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
-	mov	r10, #0x1f			@ domains 0, 1 = manager
-	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
-	/*
-	 * Memory region attributes with SCTLR.TRE=1
-	 *
-	 *   n = TEX[0],C,B
-	 *   TR = PRRR[2n+1:2n]		- memory type
-	 *   IR = NMRR[2n+1:2n]		- inner cacheable property
-	 *   OR = NMRR[2n+17:2n+16]	- outer cacheable property
-	 *
-	 *			n	TR	IR	OR
-	 *   UNCACHED		000	00
-	 *   BUFFERABLE		001	10	00	00
-	 *   WRITETHROUGH	010	10	10	10
-	 *   WRITEBACK		011	10	11	11
-	 *   reserved		110
-	 *   WRITEALLOC		111	10	01	01
-	 *   DEV_SHARED		100	01
-	 *   DEV_NONSHARED	100	01
-	 *   DEV_WC		001	10
-	 *   DEV_CACHED		011	10
-	 *
-	 * Other attributes:
-	 *
-	 *   DS0 = PRRR[16] = 0		- device shareable property
-	 *   DS1 = PRRR[17] = 1		- device shareable property
-	 *   NS0 = PRRR[18] = 0		- normal shareable property
-	 *   NS1 = PRRR[19] = 1		- normal shareable property
-	 *   NOS = PRRR[24+n] = 1	- not outer shareable
-	 */
-	ldr	r5, =0xff0a81a8			@ PRRR
-	ldr	r6, =0x40e040e0			@ NMRR
+	ldr	r5, =PRRR			@ PRRR
+	ldr	r6, =NMRR			@ NMRR
 	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 #endif
@@ -281,6 +378,10 @@ __v7_setup:
 	ldmia	r5, {r5, r6}
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	orr	r6, r6, #1 << 25		@ big-endian page tables
+#endif
+#ifdef CONFIG_SWP_EMULATE
+	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
+	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
 #endif
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
@@ -302,6 +403,8 @@ v7_crval:
 __v7_setup_stack:
 	.space	4 * 11				@ 11 registers
 
+	__INITDATA
+
 	.type	v7_processor_functions, #object
 ENTRY(v7_processor_functions)
 	.word	v7_early_abort
@@ -313,8 +416,13 @@ ENTRY(v7_processor_functions)
 	.word	cpu_v7_dcache_clean_area
 	.word	cpu_v7_switch_mm
 	.word	cpu_v7_set_pte_ext
+	.word	0
+	.word	0
+	.word	0
 	.size	v7_processor_functions, . - v7_processor_functions
 
+	.section ".rodata"
+
 	.type	cpu_arch_name, #object
 cpu_arch_name:
 	.asciz	"armv7"
@@ -328,6 +436,35 @@ cpu_elf_name:
 
 	.section ".proc.info.init", #alloc, #execinstr
 
+	.type   __v7_ca9mp_proc_info, #object
+__v7_ca9mp_proc_info:
+	.long	0x410fc090		@ Required ID value
+	.long	0xff0ffff0		@ Mask for ID
+	ALT_SMP(.long \
+		PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | \
+		PMD_FLAGS_SMP)
+	ALT_UP(.long \
+		PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | \
+		PMD_FLAGS_UP)
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_XN | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	W(b)	__v7_ca9mp_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
+	.long	cpu_v7_name
+	.long	v7_processor_functions
+	.long	v7wbi_tlb_fns
+	.long	v6_user_fns
+	.long	v7_cache_fns
+	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
+
 	/*
 	 * Match any ARMv7 processor core.
 	 */
@@ -335,18 +472,24 @@ cpu_elf_name:
 __v7_proc_info:
 	.long	0x000f0000		@ Required ID value
 	.long	0x000f0000		@ Mask for ID
-	.long   PMD_TYPE_SECT | \
+	ALT_SMP(.long \
+		PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | \
+		PMD_FLAGS_SMP)
+	ALT_UP(.long \
+		PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ | \
-		PMD_FLAGS
+		PMD_FLAGS_UP)
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_XN | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__v7_setup
+	W(b)	__v7_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
 	.long	cpu_v7_name
 	.long	v7_processor_functions
 	.long	v7wbi_tlb_fns
diff --git a/relocate_kernel.S b/relocate_kernel.S
index 61930eb..9cf4cbf 100644
--- a/relocate_kernel.S
+++ b/relocate_kernel.S
@@ -10,6 +10,12 @@ relocate_new_kernel:
 	ldr	r0,kexec_indirection_page
 	ldr	r1,kexec_start_address
 
+	/*
+	 * If there is no indirection page (we are doing crashdumps)
+	 * skip any relocation.
+	 */
+	cmp	r0, #0
+	beq	2f
 
 0:	/* top, read another word for the indirection page */
 	ldr	r3, [r0],#4
@@ -53,6 +59,8 @@ relocate_new_kernel:
 	ldr r2,kexec_boot_atags
 	mov pc,lr
 
+	.align
+
 	.globl kexec_start_address
 kexec_start_address:
 	.long	0x0
diff --git a/tlb-v7.S b/tlb-v7.S
index a26a605..f8f3962 100644
--- a/tlb-v7.S
+++ b/tlb-v7.S
@@ -13,9 +13,10 @@
  */
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include "assembler.h"
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
-#include <asm/tlbflush.h>
+#include "tlbflush.h"
 #include "proc-macros.S"
 
 /*
@@ -40,18 +41,16 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	asid	r3, r3				@ mask ASID
 	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
 	mov	r1, r1, lsl #PAGE_SHIFT
-	vma_vm_flags r2, r2			@ get vma->vm_flags
 1:
-#ifdef CONFIG_SMP
-	mcr	p15, 0, r0, c8, c3, 1		@ TLB invalidate U MVA (shareable) 
-#else
-	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate U MVA
-#endif
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
+
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
 	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
+	ALT_SMP(mcr	p15, 0, ip, c7, c1, 6)	@ flush BTAC/BTB Inner Shareable
+	ALT_UP(mcr	p15, 0, ip, c7, c5, 6)	@ flush BTAC/BTB
 	dsb
 	mov	pc, lr
 ENDPROC(v7wbi_flush_user_tlb_range)
@@ -71,16 +70,14 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	mov	r0, r0, lsl #PAGE_SHIFT
 	mov	r1, r1, lsl #PAGE_SHIFT
 1:
-#ifdef CONFIG_SMP
-	mcr	p15, 0, r0, c8, c3, 1		@ TLB invalidate U MVA (shareable)
-#else
-	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate U MVA
-#endif
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
 	mov	r2, #0
-	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
+	ALT_SMP(mcr	p15, 0, r2, c7, c1, 6)	@ flush BTAC/BTB Inner Shareable
+	ALT_UP(mcr	p15, 0, r2, c7, c5, 6)	@ flush BTAC/BTB
 	dsb
 	isb
 	mov	pc, lr
@@ -92,5 +89,6 @@ ENDPROC(v7wbi_flush_kern_tlb_range)
 ENTRY(v7wbi_tlb_fns)
 	.long	v7wbi_flush_user_tlb_range
 	.long	v7wbi_flush_kern_tlb_range
-	.long	v7wbi_tlb_flags
+	ALT_SMP(.long	v7wbi_tlb_flags_smp)
+	ALT_UP(.long	v7wbi_tlb_flags_up)
 	.size	v7wbi_tlb_fns, . - v7wbi_tlb_fns
diff --git a/tlbflush.h b/tlbflush.h
new file mode 100644
index 0000000..d2005de
--- /dev/null
+++ b/tlbflush.h
@@ -0,0 +1,587 @@
+/*
+ *  arch/arm/include/asm/tlbflush.h
+ *
+ *  Copyright (C) 1999-2003 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASMARM_TLBFLUSH_H
+#define _ASMARM_TLBFLUSH_H
+
+#ifdef CONFIG_MMU
+
+#include <asm/glue.h>
+
+#define TLB_V3_PAGE	(1 << 0)
+#define TLB_V4_U_PAGE	(1 << 1)
+#define TLB_V4_D_PAGE	(1 << 2)
+#define TLB_V4_I_PAGE	(1 << 3)
+#define TLB_V6_U_PAGE	(1 << 4)
+#define TLB_V6_D_PAGE	(1 << 5)
+#define TLB_V6_I_PAGE	(1 << 6)
+
+#define TLB_V3_FULL	(1 << 8)
+#define TLB_V4_U_FULL	(1 << 9)
+#define TLB_V4_D_FULL	(1 << 10)
+#define TLB_V4_I_FULL	(1 << 11)
+#define TLB_V6_U_FULL	(1 << 12)
+#define TLB_V6_D_FULL	(1 << 13)
+#define TLB_V6_I_FULL	(1 << 14)
+
+#define TLB_V6_U_ASID	(1 << 16)
+#define TLB_V6_D_ASID	(1 << 17)
+#define TLB_V6_I_ASID	(1 << 18)
+
+#define TLB_BTB		(1 << 28)
+
+/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
+#define TLB_V7_UIS_PAGE	(1 << 19)
+#define TLB_V7_UIS_FULL (1 << 20)
+#define TLB_V7_UIS_ASID (1 << 21)
+
+/* Inner Shareable BTB operation (ARMv7 MP extensions) */
+#define TLB_V7_IS_BTB	(1 << 22)
+
+#define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
+#define TLB_DCLEAN	(1 << 30)
+#define TLB_WB		(1 << 31)
+
+/*
+ *	MMU TLB Model
+ *	=============
+ *
+ *	We have the following to choose from:
+ *	  v3    - ARMv3
+ *	  v4    - ARMv4 without write buffer
+ *	  v4wb  - ARMv4 with write buffer without I TLB flush entry instruction
+ *	  v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
+ *	  fr    - Feroceon (v4wbi with non-outer-cacheable page table walks)
+ *	  fa    - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB))
+ *	  v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
+ *	  v7wbi - identical to v6wbi
+ */
+#undef _TLB
+#undef MULTI_TLB
+
+#ifdef CONFIG_SMP_ON_UP
+#define MULTI_TLB 1
+#endif
+
+#define v3_tlb_flags	(TLB_V3_FULL | TLB_V3_PAGE)
+
+#ifdef CONFIG_CPU_TLB_V3
+# define v3_possible_flags	v3_tlb_flags
+# define v3_always_flags	v3_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v3
+# endif
+#else
+# define v3_possible_flags	0
+# define v3_always_flags	(-1UL)
+#endif
+
+#define v4_tlb_flags	(TLB_V4_U_FULL | TLB_V4_U_PAGE)
+
+#ifdef CONFIG_CPU_TLB_V4WT
+# define v4_possible_flags	v4_tlb_flags
+# define v4_always_flags	v4_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v4
+# endif
+#else
+# define v4_possible_flags	0
+# define v4_always_flags	(-1UL)
+#endif
+
+#define fa_tlb_flags	(TLB_WB | TLB_BTB | TLB_DCLEAN | \
+			 TLB_V4_U_FULL | TLB_V4_U_PAGE)
+
+#ifdef CONFIG_CPU_TLB_FA
+# define fa_possible_flags	fa_tlb_flags
+# define fa_always_flags	fa_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB fa
+# endif
+#else
+# define fa_possible_flags	0
+# define fa_always_flags	(-1UL)
+#endif
+
+#define v4wbi_tlb_flags	(TLB_WB | TLB_DCLEAN | \
+			 TLB_V4_I_FULL | TLB_V4_D_FULL | \
+			 TLB_V4_I_PAGE | TLB_V4_D_PAGE)
+
+#ifdef CONFIG_CPU_TLB_V4WBI
+# define v4wbi_possible_flags	v4wbi_tlb_flags
+# define v4wbi_always_flags	v4wbi_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v4wbi
+# endif
+#else
+# define v4wbi_possible_flags	0
+# define v4wbi_always_flags	(-1UL)
+#endif
+
+#define fr_tlb_flags	(TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \
+			 TLB_V4_I_FULL | TLB_V4_D_FULL | \
+			 TLB_V4_I_PAGE | TLB_V4_D_PAGE)
+
+#ifdef CONFIG_CPU_TLB_FEROCEON
+# define fr_possible_flags	fr_tlb_flags
+# define fr_always_flags	fr_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v4wbi
+# endif
+#else
+# define fr_possible_flags	0
+# define fr_always_flags	(-1UL)
+#endif
+
+#define v4wb_tlb_flags	(TLB_WB | TLB_DCLEAN | \
+			 TLB_V4_I_FULL | TLB_V4_D_FULL | \
+			 TLB_V4_D_PAGE)
+
+#ifdef CONFIG_CPU_TLB_V4WB
+# define v4wb_possible_flags	v4wb_tlb_flags
+# define v4wb_always_flags	v4wb_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v4wb
+# endif
+#else
+# define v4wb_possible_flags	0
+# define v4wb_always_flags	(-1UL)
+#endif
+
+#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
+			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
+			 TLB_V6_I_ASID | TLB_V6_D_ASID)
+
+#ifdef CONFIG_CPU_TLB_V6
+# define v6wbi_possible_flags	v6wbi_tlb_flags
+# define v6wbi_always_flags	v6wbi_tlb_flags
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v6wbi
+# endif
+#else
+# define v6wbi_possible_flags	0
+# define v6wbi_always_flags	(-1UL)
+#endif
+
+#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
+			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
+#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
+
+#ifdef CONFIG_CPU_TLB_V7
+
+# ifdef CONFIG_SMP_ON_UP
+#  define v7wbi_possible_flags	(v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up)
+#  define v7wbi_always_flags	(v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up)
+# elif defined(CONFIG_SMP)
+#  define v7wbi_possible_flags	v7wbi_tlb_flags_smp
+#  define v7wbi_always_flags	v7wbi_tlb_flags_smp
+# else
+#  define v7wbi_possible_flags	v7wbi_tlb_flags_up
+#  define v7wbi_always_flags	v7wbi_tlb_flags_up
+# endif
+# ifdef _TLB
+#  define MULTI_TLB 1
+# else
+#  define _TLB v7wbi
+# endif
+#else
+# define v7wbi_possible_flags	0
+# define v7wbi_always_flags	(-1UL)
+#endif
+
+#ifndef _TLB
+#error Unknown TLB model
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+
+struct cpu_tlb_fns {
+	void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
+	void (*flush_kern_range)(unsigned long, unsigned long);
+	unsigned long tlb_flags;
+};
+
+/*
+ * Select the calling method
+ */
+#ifdef MULTI_TLB
+
+#define __cpu_flush_user_tlb_range	cpu_tlb.flush_user_range
+#define __cpu_flush_kern_tlb_range	cpu_tlb.flush_kern_range
+
+#else
+
+#define __cpu_flush_user_tlb_range	__glue(_TLB,_flush_user_tlb_range)
+#define __cpu_flush_kern_tlb_range	__glue(_TLB,_flush_kern_tlb_range)
+
+extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
+
+#endif
+
+extern struct cpu_tlb_fns cpu_tlb;
+
+#define __cpu_tlb_flags			cpu_tlb.tlb_flags
+
+/*
+ *	TLB Management
+ *	==============
+ *
+ *	The arch/arm/mm/tlb-*.S files implement these methods.
+ *
+ *	The TLB specific code is expected to perform whatever tests it
+ *	needs to determine if it should invalidate the TLB for each
+ *	call.  Start addresses are inclusive and end addresses are
+ *	exclusive; it is safe to round these addresses down.
+ *
+ *	flush_tlb_all()
+ *
+ *		Invalidate the entire TLB.
+ *
+ *	flush_tlb_mm(mm)
+ *
+ *		Invalidate all TLB entries in a particular address
+ *		space.
+ *		- mm	- mm_struct describing address space
+ *
+ *	flush_tlb_range(mm,start,end)
+ *
+ *		Invalidate a range of TLB entries in the specified
+ *		address space.
+ *		- mm	- mm_struct describing address space
+ *		- start - start address (may not be aligned)
+ *		- end	- end address (exclusive, may not be aligned)
+ *
+ *	flush_tlb_page(vaddr,vma)
+ *
+ *		Invalidate the specified page in the specified address range.
+ *		- vaddr - virtual address (may not be aligned)
+ *		- vma	- vma_struct describing address range
+ *
+ *	flush_kern_tlb_page(kaddr)
+ *
+ *		Invalidate the TLB entry for the specified page.  The address
+ *		will be in the kernels virtual memory space.  Current uses
+ *		only require the D-TLB to be invalidated.
+ *		- kaddr - Kernel virtual memory address
+ */
+
+/*
+ * We optimise the code below by:
+ *  - building a set of TLB flags that might be set in __cpu_tlb_flags
+ *  - building a set of TLB flags that will always be set in __cpu_tlb_flags
+ *  - if we're going to need __cpu_tlb_flags, access it once and only once
+ *
+ * This allows us to build optimal assembly for the single-CPU type case,
+ * and as close to optimal given the compiler constrants for multi-CPU
+ * case.  We could do better for the multi-CPU case if the compiler
+ * implemented the "%?" method, but this has been discontinued due to too
+ * many people getting it wrong.
+ */
+#define possible_tlb_flags	(v3_possible_flags | \
+				 v4_possible_flags | \
+				 v4wbi_possible_flags | \
+				 fr_possible_flags | \
+				 v4wb_possible_flags | \
+				 fa_possible_flags | \
+				 v6wbi_possible_flags | \
+				 v7wbi_possible_flags)
+
+#define always_tlb_flags	(v3_always_flags & \
+				 v4_always_flags & \
+				 v4wbi_always_flags & \
+				 fr_always_flags & \
+				 v4wb_always_flags & \
+				 fa_always_flags & \
+				 v6wbi_always_flags & \
+				 v7wbi_always_flags)
+
+#define tlb_flag(f)	((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f)))
+
+static inline void local_flush_tlb_all(void)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	if (tlb_flag(TLB_V3_FULL))
+		asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V4_U_FULL | TLB_V6_U_FULL))
+		asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V4_D_FULL | TLB_V6_D_FULL))
+		asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL))
+		asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V7_UIS_FULL))
+		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
+
+	if (tlb_flag(TLB_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	const int zero = 0;
+	const int asid = ASID(mm);
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) {
+		if (tlb_flag(TLB_V3_FULL))
+			asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
+		if (tlb_flag(TLB_V4_U_FULL))
+			asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc");
+		if (tlb_flag(TLB_V4_D_FULL))
+			asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc");
+		if (tlb_flag(TLB_V4_I_FULL))
+			asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+	}
+	put_cpu();
+
+	if (tlb_flag(TLB_V6_U_ASID))
+		asm("mcr p15, 0, %0, c8, c7, 2" : : "r" (asid) : "cc");
+	if (tlb_flag(TLB_V6_D_ASID))
+		asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc");
+	if (tlb_flag(TLB_V6_I_ASID))
+		asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc");
+	if (tlb_flag(TLB_V7_UIS_ASID))
+#ifdef CONFIG_ARM_ERRATA_720789
+		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
+#else
+		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
+#endif
+
+	if (tlb_flag(TLB_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+		dsb();
+	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+}
+
+static inline void
+local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+		if (tlb_flag(TLB_V3_PAGE))
+			asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc");
+		if (tlb_flag(TLB_V4_U_PAGE))
+			asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc");
+		if (tlb_flag(TLB_V4_D_PAGE))
+			asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc");
+		if (tlb_flag(TLB_V4_I_PAGE))
+			asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc");
+		if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
+			asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+	}
+
+	if (tlb_flag(TLB_V6_U_PAGE))
+		asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc");
+	if (tlb_flag(TLB_V6_D_PAGE))
+		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc");
+	if (tlb_flag(TLB_V6_I_PAGE))
+		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+#ifdef CONFIG_ARM_ERRATA_720789
+		asm("mcr p15, 0, %0, c8, c3, 3" : : "r" (uaddr & PAGE_MASK) : "cc");
+#else
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
+#endif
+
+	if (tlb_flag(TLB_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+		dsb();
+	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+}
+
+static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	kaddr &= PAGE_MASK;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	if (tlb_flag(TLB_V3_PAGE))
+		asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V4_U_PAGE))
+		asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V4_D_PAGE))
+		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V4_I_PAGE))
+		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc");
+	if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
+		asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+
+	if (tlb_flag(TLB_V6_U_PAGE))
+		asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V6_D_PAGE))
+		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V6_I_PAGE))
+		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
+
+	if (tlb_flag(TLB_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
+}
+
+/*
+ *	flush_pmd_entry
+ *
+ *	Flush a PMD entry (word aligned, or double-word aligned) to
+ *	RAM if the TLB for the CPU we are running on requires this.
+ *	This is typically used when we are creating PMD entries.
+ *
+ *	clean_pmd_entry
+ *
+ *	Clean (but don't drain the write buffer) if the CPU requires
+ *	these operations.  This is typically used when we are removing
+ *	PMD entries.
+ */
+static inline void flush_pmd_entry(pmd_t *pmd)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_DCLEAN))
+		asm("mcr	p15, 0, %0, c7, c10, 1	@ flush_pmd"
+			: : "r" (pmd) : "cc");
+
+	if (tlb_flag(TLB_L2CLEAN_FR))
+		asm("mcr	p15, 1, %0, c15, c9, 1  @ L2 flush_pmd"
+			: : "r" (pmd) : "cc");
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+}
+
+static inline void clean_pmd_entry(pmd_t *pmd)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_DCLEAN))
+		asm("mcr	p15, 0, %0, c7, c10, 1	@ flush_pmd"
+			: : "r" (pmd) : "cc");
+
+	if (tlb_flag(TLB_L2CLEAN_FR))
+		asm("mcr	p15, 1, %0, c15, c9, 1  @ L2 flush_pmd"
+			: : "r" (pmd) : "cc");
+}
+
+#undef tlb_flag
+#undef always_tlb_flags
+#undef possible_tlb_flags
+
+/*
+ * Convert calls to our calling convention.
+ */
+#define local_flush_tlb_range(vma,start,end)	__cpu_flush_user_tlb_range(start,end,vma)
+#define local_flush_tlb_kernel_range(s,e)	__cpu_flush_kern_tlb_range(s,e)
+
+#ifndef CONFIG_SMP
+#define flush_tlb_all		local_flush_tlb_all
+#define flush_tlb_mm		local_flush_tlb_mm
+#define flush_tlb_page		local_flush_tlb_page
+#define flush_tlb_kernel_page	local_flush_tlb_kernel_page
+#define flush_tlb_range		local_flush_tlb_range
+#define flush_tlb_kernel_range	local_flush_tlb_kernel_range
+#else
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
+extern void flush_tlb_kernel_page(unsigned long kaddr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+#endif
+
+/*
+ * If PG_dcache_clean is not set for the page, we need to ensure that any
+ * cache entries for the kernels virtual memory range are written
+ * back to the page. On ARMv6 and later, the cache coherency is handled via
+ * the set_pte_at() function.
+ */
+#if __LINUX_ARM_ARCH__ < 6
+extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
+	pte_t *ptep);
+#else
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep)
+{
+}
+#endif
+
+#endif
+
+#endif /* CONFIG_MMU */
+
+#endif
-- 
2.39.5