From: Michael Gernoth Date: Mon, 23 May 2011 21:35:06 +0000 (+0200) Subject: update kexec to 2.6.39 version, still doesn't work... X-Git-Url: https://git.zerfleddert.de/cgi-bin/gitweb.cgi/ms2-kexec/commitdiff_plain/0c549ba1cfe1d2d61c07eccab7f274740b706ed6?ds=sidebyside update kexec to 2.6.39 version, still doesn't work... --- diff --git a/Makefile b/Makefile index 61a5644..bc86aac 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS += -DCONFIG_KEXEC -Wall obj-m += kexec_load.o kexec_load-objs := kexec.o machine_kexec.o idmap.o sys.o core.o relocate_kernel.o \ - proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o pabort-v7.o copypage-v6.o entry-common.o driver_sys.o + proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o pabort-v7.o copypage-v6.o driver_sys.o all: PATH=$(CROSS_PATH):$(PATH) CROSS_COMPILE=$(CROSS_COMPILE) ARCH=$(ARCH) make -C $(KDIR) M=$(PWD) modules diff --git a/abort-ev7.S b/abort-ev7.S index 2e6dc04..ec88b15 100644 --- a/abort-ev7.S +++ b/abort-ev7.S @@ -29,5 +29,26 @@ ENTRY(v7_early_abort) * V6 code adjusts the returned DFSR. * New designs should not need to patch up faults. */ + +#if defined(CONFIG_VERIFY_PERMISSION_FAULT) + /* + * Detect erroneous permission failures and fix + */ + ldr r3, =0x40d @ On permission fault + and r3, r1, r3 + cmp r3, #0x0d + movne pc, lr + + mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR + isb + mrc p15, 0, r2, c7, c4, 0 @ Read the PAR + and r3, r2, #0x7b @ On translation fault + cmp r3, #0x0b + movne pc, lr + bic r1, r1, #0xf @ Fix up FSR FS[5:0] + and r2, r2, #0x7e + orr r1, r1, r2, LSR #1 +#endif + mov pc, lr ENDPROC(v7_early_abort) diff --git a/assembler.h b/assembler.h new file mode 100644 index 0000000..bc2d2d7 --- /dev/null +++ b/assembler.h @@ -0,0 +1,292 @@ +/* + * arch/arm/include/asm/assembler.h + * + * Copyright (C) 1996-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains arm architecture specific defines + * for the different processors. + * + * Do not include any C declarations in this file - it is included by + * assembler source. + */ +#ifndef __ASSEMBLY__ +#error "Only include this from assembly code" +#endif + +#include +#include + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#define get_byte_0 lsl #0 +#define get_byte_1 lsr #8 +#define get_byte_2 lsr #16 +#define get_byte_3 lsr #24 +#define put_byte_0 lsl #0 +#define put_byte_1 lsl #8 +#define put_byte_2 lsl #16 +#define put_byte_3 lsl #24 +#else +#define pull lsl +#define push lsr +#define get_byte_0 lsr #24 +#define get_byte_1 lsr #16 +#define get_byte_2 lsr #8 +#define get_byte_3 lsl #0 +#define put_byte_0 lsl #24 +#define put_byte_1 lsl #16 +#define put_byte_2 lsl #8 +#define put_byte_3 lsl #0 +#endif + +/* + * Data preload for architectures that support it + */ +#if __LINUX_ARM_ARCH__ >= 5 +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * This can be used to enable code to cacheline align the destination + * pointer when bulk writing to memory. Experiments on StrongARM and + * XScale didn't show this a worthwhile thing to do when the cache is not + * set to write-allocate (this would need further testing on XScale when WA + * is used). + * + * On Feroceon there is much to gain however, regardless of cache mode. + */ +#ifdef CONFIG_CPU_FEROCEON +#define CALGN(code...) code +#else +#define CALGN(code...) +#endif + +/* + * Enable and disable interrupts + */ +#if __LINUX_ARM_ARCH__ >= 6 + .macro disable_irq_notrace + cpsid i + .endm + + .macro enable_irq_notrace + cpsie i + .endm +#else + .macro disable_irq_notrace + msr cpsr_c, #PSR_I_BIT | SVC_MODE + .endm + + .macro enable_irq_notrace + msr cpsr_c, #SVC_MODE + .endm +#endif + + .macro asm_trace_hardirqs_off +#if defined(CONFIG_TRACE_IRQFLAGS) + stmdb sp!, {r0-r3, ip, lr} + bl trace_hardirqs_off + ldmia sp!, {r0-r3, ip, lr} +#endif + .endm + + .macro asm_trace_hardirqs_on_cond, cond +#if defined(CONFIG_TRACE_IRQFLAGS) + /* + * actually the registers should be pushed and pop'd conditionally, but + * after bl the flags are certainly clobbered + */ + stmdb sp!, {r0-r3, ip, lr} + bl\cond trace_hardirqs_on + ldmia sp!, {r0-r3, ip, lr} +#endif + .endm + + .macro asm_trace_hardirqs_on + asm_trace_hardirqs_on_cond al + .endm + + .macro disable_irq + disable_irq_notrace + asm_trace_hardirqs_off + .endm + + .macro enable_irq + asm_trace_hardirqs_on + enable_irq_notrace + .endm +/* + * Save the current IRQ state and disable IRQs. Note that this macro + * assumes FIQs are enabled, and that the processor is in SVC mode. + */ + .macro save_and_disable_irqs, oldcpsr + mrs \oldcpsr, cpsr + disable_irq + .endm + +/* + * Restore interrupt state previously stored in a register. We don't + * guarantee that this will preserve the flags. + */ + .macro restore_irqs_notrace, oldcpsr + msr cpsr_c, \oldcpsr + .endm + + .macro restore_irqs, oldcpsr + tst \oldcpsr, #PSR_I_BIT + asm_trace_hardirqs_on_cond eq + restore_irqs_notrace \oldcpsr + .endm + +#define USER(x...) \ +9999: x; \ + .pushsection __ex_table,"a"; \ + .align 3; \ + .long 9999b,9001f; \ + .popsection + +#ifdef CONFIG_SMP +#define ALT_SMP(instr...) \ +9998: instr +/* + * Note: if you get assembler errors from ALT_UP() when building with + * CONFIG_THUMB2_KERNEL, you almost certainly need to use + * ALT_SMP( W(instr) ... ) + */ +#define ALT_UP(instr...) \ + .pushsection ".alt.smp.init", "a" ;\ + .long 9998b ;\ +9997: instr ;\ + .if . - 9997b != 4 ;\ + .error "ALT_UP() content must assemble to exactly 4 bytes";\ + .endif ;\ + .popsection +#define ALT_UP_B(label) \ + .equ up_b_offset, label - 9998b ;\ + .pushsection ".alt.smp.init", "a" ;\ + .long 9998b ;\ + W(b) . + up_b_offset ;\ + .popsection +#else +#define ALT_SMP(instr...) +#define ALT_UP(instr...) instr +#define ALT_UP_B(label) b label +#endif + +/* + * SMP data memory barrier + */ + .macro smp_dmb mode +#ifdef CONFIG_SMP +#if __LINUX_ARM_ARCH__ >= 7 + .ifeqs "\mode","arm" + ALT_SMP(dmb) + .else + ALT_SMP(W(dmb)) + .endif +#elif __LINUX_ARM_ARCH__ == 6 + ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb +#else +#error Incompatible SMP platform +#endif + .ifeqs "\mode","arm" + ALT_UP(nop) + .else + ALT_UP(W(nop)) + .endif +#endif + .endm + +#ifdef CONFIG_THUMB2_KERNEL + .macro setmode, mode, reg + mov \reg, #\mode + msr cpsr_c, \reg + .endm +#else + .macro setmode, mode, reg + msr cpsr_c, #\mode + .endm +#endif + +/* + * STRT/LDRT access macros with ARM and Thumb-2 variants + */ +#ifdef CONFIG_THUMB2_KERNEL + + .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T() +9999: + .if \inc == 1 + \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] + .elseif \inc == 4 + \instr\cond\()\t\().w \reg, [\ptr, #\off] + .else + .error "Unsupported inc macro argument" + .endif + + .pushsection __ex_table,"a" + .align 3 + .long 9999b, \abort + .popsection + .endm + + .macro usracc, instr, reg, ptr, inc, cond, rept, abort + @ explicit IT instruction needed because of the label + @ introduced by the USER macro + .ifnc \cond,al + .if \rept == 1 + itt \cond + .elseif \rept == 2 + ittt \cond + .else + .error "Unsupported rept macro argument" + .endif + .endif + + @ Slightly optimised to avoid incrementing the pointer twice + usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort + .if \rept == 2 + usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort + .endif + + add\cond \ptr, #\rept * \inc + .endm + +#else /* !CONFIG_THUMB2_KERNEL */ + + .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=T() + .rept \rept +9999: + .if \inc == 1 + \instr\cond\()b\()\t \reg, [\ptr], #\inc + .elseif \inc == 4 + \instr\cond\()\t \reg, [\ptr], #\inc + .else + .error "Unsupported inc macro argument" + .endif + + .pushsection __ex_table,"a" + .align 3 + .long 9999b, \abort + .popsection + .endr + .endm + +#endif /* CONFIG_THUMB2_KERNEL */ + + .macro strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f + usracc str, \reg, \ptr, \inc, \cond, \rept, \abort + .endm + + .macro ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f + usracc ldr, \reg, \ptr, \inc, \cond, \rept, \abort + .endm diff --git a/cache-v7.S b/cache-v7.S index e1bd975..ae604c9 100644 --- a/cache-v7.S +++ b/cache-v7.S @@ -12,11 +12,26 @@ */ #include #include -#include +#include "assembler.h" #include #include "proc-macros.S" +/* + * v7_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7_flush_icache_all) + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + mov pc, lr +ENDPROC(v7_flush_icache_all) + /* * v7_flush_dcache_all() * @@ -81,7 +96,7 @@ ENDPROC(v7_flush_dcache_all) * Flush the entire cache system. * The data cache flush is now achieved using atomic clean / invalidates * working outwards from L1 cache. This is done using Set/Way based cache - * maintainance instructions. + * maintenance instructions. * The instruction cache can still be invalidated back to the point of * unification in a single instruction. * @@ -91,7 +106,8 @@ ENTRY(v7_flush_kern_cache_all) THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) bl v7_flush_dcache_all mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) mov pc, lr @@ -157,17 +173,25 @@ ENTRY(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 - bic r0, r0, r3 + bic r12, r0, r3 1: - USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b dsb - USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line - add r0, r0, r2 + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 2: - cmp r0, r1 - blo 1b + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b +3: mov r0, #0 - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB dsb isb mov pc, lr @@ -177,25 +201,26 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b + mov r12, r12, lsr #12 + mov r12, r12, lsl #12 + add r12, r12, #4096 + b 3b UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) /* - * v7_flush_kern_dcache_page(kaddr) + * v7_flush_kern_dcache_area(void *addr, size_t size) * * Ensure that the data held in the page kaddr is written back * to the page in question. * - * - kaddr - kernel address (guaranteed to be page aligned) + * - addr - kernel address + * - size - region size */ -ENTRY(v7_flush_kern_dcache_page) +ENTRY(v7_flush_kern_dcache_area) dcache_line_size r2, r3 - add r1, r0, #PAGE_SZ + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line add r0, r0, r2 @@ -203,7 +228,7 @@ ENTRY(v7_flush_kern_dcache_page) blo 1b dsb mov pc, lr -ENDPROC(v7_flush_kern_dcache_page) +ENDPROC(v7_flush_kern_dcache_area) /* * v7_dma_inv_range(start,end) @@ -215,7 +240,7 @@ ENDPROC(v7_flush_kern_dcache_page) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7_dma_inv_range) +v7_dma_inv_range: dcache_line_size r2, r3 sub r3, r2, #1 tst r0, r3 @@ -239,7 +264,7 @@ ENDPROC(v7_dma_inv_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7_dma_clean_range) +v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -274,13 +299,12 @@ ENDPROC(v7_dma_flush_range) .type v7_cache_fns, #object ENTRY(v7_cache_fns) + .long v7_flush_icache_all .long v7_flush_kern_cache_all .long v7_flush_user_cache_all .long v7_flush_user_cache_range .long v7_coherent_kern_range .long v7_coherent_user_range - .long v7_flush_kern_dcache_page - .long v7_dma_inv_range - .long v7_dma_clean_range + .long v7_flush_kern_dcache_area .long v7_dma_flush_range .size v7_cache_fns, . - v7_cache_fns diff --git a/copypage-v6.c b/copypage-v6.c index 9cc8485..a9ace4e 100644 --- a/copypage-v6.c +++ b/copypage-v6.c @@ -1,13 +1,3 @@ -/* - * linux/arch/arm/mm/copypage-v6.c - * - * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved. - * This Edition is maintained by Matthew Veety (aliasxerog) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ #include #include #include @@ -21,43 +11,5 @@ #include "mm.h" -#if SHMLBA > 16384 -#error FIX ME -#endif - -#define from_address (0xffff8000) -#define to_address (0xffffc000) - -/* - * Copy the user page. No aliasing to deal with so we can just - * attack the kernel's existing mapping of these pages. - */ - -static void v6_copy_user_highpage_nonaliasing(struct page *to, - struct page *from, unsigned long vaddr) -{ - void *kto, *kfrom; - - kfrom = kmap_atomic(from, KM_USER0); - kto = kmap_atomic(to, KM_USER1); - copy_page(kto, kfrom); - kunmap_atomic(kto, KM_USER1); - kunmap_atomic(kfrom, KM_USER0); -} - -/* - * Clear the user page. No aliasing to deal with so we can just - * attack the kernel's existing mapping of this page. - */ -static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr) -{ - void *kaddr = kmap_atomic(page, KM_USER0); - clear_page(kaddr); - kunmap_atomic(kaddr, KM_USER0); -} - - struct cpu_user_fns v6_user_fns __initdata = { - .cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing, - .cpu_copy_user_highpage = v6_copy_user_highpage_nonaliasing, }; diff --git a/entry-header.S b/entry-header.S index 7e9ed1e..051166c 100644 --- a/entry-header.S +++ b/entry-header.S @@ -76,13 +76,13 @@ #ifndef CONFIG_THUMB2_KERNEL .macro svc_exit, rpsr msr spsr_cxsf, \rpsr -#if defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -#elif defined (CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) ldr r0, [sp] strex r1, r2, [sp] @ clear the exclusive monitor ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr #else ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr #endif @@ -92,16 +92,18 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc -#if defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor -#elif defined (CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) strex r1, r2, [sp] @ clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor #endif .if \fast ldmdb sp, {r1 - lr}^ @ get calling r1 - lr .else ldmdb sp, {r0 - lr}^ @ get calling r0 - lr .endif + mov r0, r0 @ ARMv5T and earlier require a nop + @ after ldm {}^ add sp, sp, #S_FRAME_SIZE - S_PC movs pc, lr @ return & move spsr_svc into cpsr .endm @@ -163,6 +165,25 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ + @ + @ Debug exceptions are taken as prefetch or data aborts. + @ We must disable preemption during the handler so that + @ we can access the debug registers safely. + @ + .macro debug_entry, fsr +#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) + ldr r4, =0x40f @ mask out fsr.fs + and r5, r4, \fsr + cmp r5, #2 @ debug exception + bne 1f + get_thread_info r10 + ldr r6, [r10, #TI_PREEMPT] @ get preempt count + add r11, r6, #1 @ increment it + str r11, [r10, #TI_PREEMPT] +1: +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/hwcap.h b/hwcap.h new file mode 100644 index 0000000..c1062c3 --- /dev/null +++ b/hwcap.h @@ -0,0 +1,33 @@ +#ifndef __ASMARM_HWCAP_H +#define __ASMARM_HWCAP_H + +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#define HWCAP_SWP 1 +#define HWCAP_HALF 2 +#define HWCAP_THUMB 4 +#define HWCAP_26BIT 8 /* Play it safe */ +#define HWCAP_FAST_MULT 16 +#define HWCAP_FPA 32 +#define HWCAP_VFP 64 +#define HWCAP_EDSP 128 +#define HWCAP_JAVA 256 +#define HWCAP_IWMMXT 512 +#define HWCAP_CRUNCH 1024 +#define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 +#define HWCAP_VFPv3 8192 +#define HWCAP_VFPv3D16 16384 +#define HWCAP_TLS 32768 + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +/* + * This yields a mask that user programs can use to figure out what + * instruction set this cpu supports. + */ +#define ELF_HWCAP (elf_hwcap) +extern unsigned int elf_hwcap; +#endif + +#endif diff --git a/kexec.c b/kexec.c index e7ea604..7cdf7c2 100644 --- a/kexec.c +++ b/kexec.c @@ -1409,7 +1409,7 @@ int kernel_kexec(void) { kernel_restart_prepare(NULL); printk(KERN_EMERG "Starting new kernel\n"); - machine_shutdown(); + //machine_shutdown(); } machine_kexec(kexec_image); diff --git a/machine_kexec.c b/machine_kexec.c index 598ca61..531120f 100644 --- a/machine_kexec.c +++ b/machine_kexec.c @@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +static atomic_t waiting_for_crash_ipi; + /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -37,14 +39,47 @@ void machine_kexec_cleanup(struct kimage *image) { } -void machine_shutdown(void) +void machine_crash_nonpanic_core(void *unused) { + struct pt_regs regs; + + crash_setup_regs(®s, NULL); + printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", + smp_processor_id()); + crash_save_cpu(®s, smp_processor_id()); + flush_cache_all(); + + atomic_dec(&waiting_for_crash_ipi); + while (1) + cpu_relax(); } void machine_crash_shutdown(struct pt_regs *regs) { + unsigned long msecs; + + local_irq_disable(); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n"); + + crash_save_cpu(regs, smp_processor_id()); + + printk(KERN_INFO "Loading crashdump kernel...\n"); } +/* + * Function pointer to optional machine-specific reinitialization + */ +void (*kexec_reinit)(void); + void machine_kexec(struct kimage *image) { unsigned long page_list; @@ -74,7 +109,20 @@ void machine_kexec(struct kimage *image) (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); - cpu_proc_fin(); + if (kexec_reinit) + kexec_reinit(); + local_irq_disable(); + local_fiq_disable(); setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/ + flush_cache_all(); +#if 0 + outer_flush_all(); + outer_disable(); +#endif + cpu_proc_fin(); +#if 0 + outer_inv_all(); +#endif + flush_cache_all(); cpu_reset(reboot_code_buffer_phys); } diff --git a/pgtable.h b/pgtable.h new file mode 100644 index 0000000..5750704 --- /dev/null +++ b/pgtable.h @@ -0,0 +1,484 @@ +/* + * arch/arm/include/asm/pgtable.h + * + * Copyright (C) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASMARM_PGTABLE_H +#define _ASMARM_PGTABLE_H + +#include +#include +#include + +#ifndef CONFIG_MMU + +#include "pgtable-nommu.h" + +#else + +#include +#include +#include + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + * + * Note that platforms may override VMALLOC_START, but they must provide + * VMALLOC_END. VMALLOC_END defines the (exclusive) limit of this space, + * which may not overlap IO space. + */ +#ifndef VMALLOC_START +#define VMALLOC_OFFSET (8*1024*1024) +#define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#endif + +/* + * Hardware-wise, we have a two level page table structure, where the first + * level has 4096 entries, and the second level has 256 entries. Each entry + * is one 32-bit word. Most of the bits in the second level entry are used + * by hardware, and there aren't any "accessed" and "dirty" bits. + * + * Linux on the other hand has a three level page table structure, which can + * be wrapped to fit a two level page table structure easily - using the PGD + * and PTE only. However, Linux also expects one "PTE" table per page, and + * at least a "dirty" bit. + * + * Therefore, we tweak the implementation slightly - we tell Linux that we + * have 2048 entries in the first level, each of which is 8 bytes (iow, two + * hardware pointers to the second level.) The second level contains two + * hardware PTE tables arranged contiguously, preceded by Linux versions + * which contain the state information Linux needs. We, therefore, end up + * with 512 entries in the "PTE" level. + * + * This leads to the page tables having the following layout: + * + * pgd pte + * | | + * +--------+ + * | | +------------+ +0 + * +- - - - + | Linux pt 0 | + * | | +------------+ +1024 + * +--------+ +0 | Linux pt 1 | + * | |-----> +------------+ +2048 + * +- - - - + +4 | h/w pt 0 | + * | |-----> +------------+ +3072 + * +--------+ +8 | h/w pt 1 | + * | | +------------+ +4096 + * + * See L_PTE_xxx below for definitions of bits in the "Linux pt", and + * PTE_xxx for definitions of bits appearing in the "h/w pt". + * + * PMD_xxx definitions refer to bits in the first level page table. + * + * The "dirty" bit is emulated by only granting hardware write permission + * iff the page is marked "writable" and "dirty" in the Linux PTE. This + * means that a write to a clean page will cause a permission fault, and + * the Linux MM layer will mark the page dirty via handle_pte_fault(). + * For the hardware to notice the permission change, the TLB entry must + * be flushed, and ptep_set_access_flags() does that for us. + * + * The "accessed" or "young" bit is emulated by a similar method; we only + * allow accesses to the page if the "young" bit is set. Accesses to the + * page will cause a fault, and handle_pte_fault() will set the young bit + * for us as long as the page is marked present in the corresponding Linux + * PTE entry. Again, ptep_set_access_flags() will ensure that the TLB is + * up to date. + * + * However, when the "young" bit is cleared, we deny access to the page + * by clearing the hardware PTE. Currently Linux does not flush the TLB + * for us in this case, which means the TLB will retain the transation + * until either the TLB entry is evicted under pressure, or a context + * switch which changes the user space mapping occurs. + */ +#define PTRS_PER_PTE 512 +#define PTRS_PER_PMD 1 +#define PTRS_PER_PGD 2048 + +#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) +#define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) +#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) + +/* + * PMD_SHIFT determines the size of the area a second-level page table can map + * PGDIR_SHIFT determines what a third-level page table entry can map + */ +#define PMD_SHIFT 21 +#define PGDIR_SHIFT 21 + +#define LIBRARY_TEXT_START 0x0c000000 + +#ifndef __ASSEMBLY__ +extern void __pte_error(const char *file, int line, pte_t); +extern void __pmd_error(const char *file, int line, pmd_t); +extern void __pgd_error(const char *file, int line, pgd_t); + +#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte) +#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd) +#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd) +#endif /* !__ASSEMBLY__ */ + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * This is the lowest virtual address we can permit any user space + * mapping to be mapped at. This is particularly important for + * non-high vector CPUs. + */ +#define FIRST_USER_ADDRESS PAGE_SIZE + +#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) + +/* + * section address mask and size definitions. + */ +#define SECTION_SHIFT 20 +#define SECTION_SIZE (1UL << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +/* + * ARMv6 supersection address mask and size definitions. + */ +#define SUPERSECTION_SHIFT 24 +#define SUPERSECTION_SIZE (1UL << SUPERSECTION_SHIFT) +#define SUPERSECTION_MASK (~(SUPERSECTION_SIZE-1)) + +/* + * "Linux" PTE definitions. + * + * We keep two sets of PTEs - the hardware and the linux version. + * This allows greater flexibility in the way we map the Linux bits + * onto the hardware tables, and allows us to have YOUNG and DIRTY + * bits. + * + * The PTE table pointer refers to the hardware entries; the "Linux" + * entries are stored 1024 bytes below. + */ +#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) +#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) +#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ +#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) +#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) +#define L_PTE_USER (_AT(pteval_t, 1) << 8) +#define L_PTE_XN (_AT(pteval_t, 1) << 9) +#define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ + +/* + * These are the memory types, defined to be compatible with + * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB + */ +#define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ +#define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ +#define L_PTE_MT_WRITETHROUGH (_AT(pteval_t, 0x02) << 2) /* 0010 */ +#define L_PTE_MT_WRITEBACK (_AT(pteval_t, 0x03) << 2) /* 0011 */ +#define L_PTE_MT_MINICACHE (_AT(pteval_t, 0x06) << 2) /* 0110 (sa1100, xscale) */ +#define L_PTE_MT_WRITEALLOC (_AT(pteval_t, 0x07) << 2) /* 0111 */ +#define L_PTE_MT_DEV_SHARED (_AT(pteval_t, 0x04) << 2) /* 0100 */ +#define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ +#define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ +#define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ +#define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) + +#ifndef __ASSEMBLY__ + +/* + * The pgprot_* and protection_map entries will be fixed up in runtime + * to include the cachable and bufferable bits based on memory policy, + * as well as any architecture dependent bits like global/ASID and SMP + * shared mapping bits. + */ +#define _L_PTE_DEFAULT L_PTE_PRESENT | L_PTE_YOUNG + +extern pgprot_t pgprot_user; +extern pgprot_t pgprot_kernel; + +#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) + +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) +#define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) +#define PAGE_READONLY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) +#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) +#define PAGE_KERNEL_EXEC pgprot_kernel + +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) +#define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) +#define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_COPY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY) +#define __PAGE_READONLY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_READONLY_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY) + +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + +#define pgprot_noncached(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) + +#define pgprot_writecombine(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE) + +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#else +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED | L_PTE_XN) +#endif + +#endif /* __ASSEMBLY__ */ + +/* + * The table below defines the page protection levels that we insert into our + * Linux page table version. These get translated into the best that the + * architecture can perform. Note that on most ARM hardware: + * 1) We cannot do execute protection + * 2) If we could do execute protection, then read is implied + * 3) write implies read permissions + */ +#define __P000 __PAGE_NONE +#define __P001 __PAGE_READONLY +#define __P010 __PAGE_COPY +#define __P011 __PAGE_COPY +#define __P100 __PAGE_READONLY_EXEC +#define __P101 __PAGE_READONLY_EXEC +#define __P110 __PAGE_COPY_EXEC +#define __P111 __PAGE_COPY_EXEC + +#define __S000 __PAGE_NONE +#define __S001 __PAGE_READONLY +#define __S010 __PAGE_SHARED +#define __S011 __PAGE_SHARED +#define __S100 __PAGE_READONLY_EXEC +#define __S101 __PAGE_READONLY_EXEC +#define __S110 __PAGE_SHARED_EXEC +#define __S111 __PAGE_SHARED_EXEC + +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) + + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* to find an entry in a page-table-directory */ +#define pgd_index(addr) ((addr) >> PGDIR_SHIFT) + +#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +#define pgd_none(pgd) (0) +#define pgd_bad(pgd) (0) +#define pgd_present(pgd) (1) +#define pgd_clear(pgdp) do { } while (0) +#define set_pgd(pgd,pgdp) do { } while (0) +#define set_pud(pud,pudp) do { } while (0) + + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(dir, addr) ((pmd_t *)(dir)) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_present(pmd) (pmd_val(pmd)) +#define pmd_bad(pmd) (pmd_val(pmd) & 2) + +#define copy_pmd(pmdpd,pmdps) \ + do { \ + pmdpd[0] = pmdps[0]; \ + pmdpd[1] = pmdps[1]; \ + flush_pmd_entry(pmdpd); \ + } while (0) + +#define pmd_clear(pmdp) \ + do { \ + pmdp[0] = __pmd(0); \ + pmdp[1] = __pmd(0); \ + clean_pmd_entry(pmdp); \ + } while (0) + +static inline pte_t *pmd_page_vaddr(pmd_t pmd) +{ + return __va(pmd_val(pmd) & PAGE_MASK); +} + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) + +/* we don't need complex calculations here as the pmd is folded into the pgd */ +#define pmd_addr_end(addr,end) (end) + + +#ifndef CONFIG_HIGHPTE +#define __pte_map(pmd) pmd_page_vaddr(*(pmd)) +#define __pte_unmap(pte) do { } while (0) +#else +#define __pte_map(pmd) (pte_t *)kmap_atomic(pmd_page(*(pmd))) +#define __pte_unmap(pte) kunmap_atomic(pte) +#endif + +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_kernel(pmd,addr) (pmd_page_vaddr(*(pmd)) + pte_index(addr)) + +#define pte_offset_map(pmd,addr) (__pte_map(pmd) + pte_index(addr)) +#define pte_unmap(pte) __pte_unmap(pte) + +#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) +#define pfn_pte(pfn,prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define mk_pte(page,prot) pfn_pte(page_to_pfn(page), prot) + +#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) +#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) + +#if __LINUX_ARM_ARCH__ < 6 +static inline void __sync_icache_dcache(pte_t pteval) +{ +} +#else +extern void __sync_icache_dcache(pte_t pteval); +#endif + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + if (addr >= TASK_SIZE) + set_pte_ext(ptep, pteval, 0); + else { + __sync_icache_dcache(pteval); + set_pte_ext(ptep, pteval, PTE_EXT_NG); + } +} + +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_special(pte) (0) + +#define pte_present_user(pte) \ + ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ + (L_PTE_PRESENT | L_PTE_USER)) + +#define PTE_BIT_FUNC(fn,op) \ +static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } + +PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY); +PTE_BIT_FUNC(mkwrite, &= ~L_PTE_RDONLY); +PTE_BIT_FUNC(mkclean, &= ~L_PTE_DIRTY); +PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); +PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); +PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); + +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + return pte; +} + +/* + * Encode and decode a swap entry. Swap entries are stored in the Linux + * page tables as follows: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * <--------------- offset --------------------> <- type --> 0 0 0 + * + * This gives us up to 63 swap files and 32GB per swap file. Note that + * the offset field is always non-zero. + */ +#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) +#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) + +#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) +#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) + +/* + * It is an error for the kernel to have more swap files than we can + * encode in the PTEs. This ensures that we know when MAX_SWAPFILES + * is increased beyond what we presently support. + */ +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) + +/* + * Encode and decode a file entry. File entries are stored in the Linux + * page tables as follows: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * <----------------------- offset ------------------------> 1 0 0 + */ +#define pte_file(pte) (pte_val(pte) & L_PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | L_PTE_FILE) + +#define PTE_FILE_MAX_BITS 29 + +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +/* FIXME: this is not correct */ +#define kern_addr_valid(addr) (1) + +#include + +/* + * We provide our own arch_get_unmapped_area to cope with VIPT caches. + */ +#define HAVE_ARCH_UNMAPPED_AREA + +/* + * remap a physical page `pfn' of size `size' with page protection `prot' + * into virtual address `from' + */ +#define io_remap_pfn_range(vma,from,pfn,size,prot) \ + remap_pfn_range(vma, from, pfn, size, prot) + +#define pgtable_cache_init() do { } while (0) + +void identity_mapping_add(pgd_t *, unsigned long, unsigned long); +void identity_mapping_del(pgd_t *, unsigned long, unsigned long); + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_MMU */ + +#endif /* _ASMARM_PGTABLE_H */ diff --git a/proc-macros.S b/proc-macros.S index 7d63bea..34261f9 100644 --- a/proc-macros.S +++ b/proc-macros.S @@ -61,27 +61,37 @@ .endm /* - * cache_line_size - get the cache line size from the CSIDR register - * (available on ARMv7+). It assumes that the CSSR register was configured - * to access the L1 data cache CSIDR. + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. */ .macro dcache_line_size, reg, tmp - mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR - and \tmp, \tmp, #7 @ cache line size encoding - mov \reg, #16 @ size offset + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size .endm +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm /* * Sanity check the PTE configuration for the code below - which makes - * certain assumptions about how these bits are layed out. + * certain assumptions about how these bits are laid out. */ #ifdef CONFIG_MMU #if L_PTE_SHARED != PTE_EXT_SHARED #error PTE shared bit mismatch #endif -#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\ +#if (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED #error Invalid Linux PTE bit settings #endif @@ -99,6 +109,10 @@ * 110x 0 1 0 r/w r/o * 11x0 0 1 0 r/w r/o * 1111 0 1 1 r/w r/w + * + * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed: + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o */ .macro armv6_mt_table pfx \pfx\()_mt_table: @@ -121,7 +135,7 @@ .endm .macro armv6_set_pte_ext pfx - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version bic r3, r1, #0x000003fc bic r3, r3, #PTE_TYPE_MASK @@ -132,17 +146,20 @@ and r2, r1, #L_PTE_MT_MASK ldr r2, [ip, r2] - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN orr r3, r3, r2 @@ -170,9 +187,9 @@ * 1111 0xff r/w r/w */ .macro armv3_set_pte_ext wc_disable=1 - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits bic r2, r2, #PTE_TYPE_MASK @@ -181,7 +198,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_SMALL_AP_URO_SRW - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_SMALL_AP_UNO_SRW tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? @@ -193,7 +210,7 @@ bicne r2, r2, #PTE_BUFFERABLE #endif .endif - str r2, [r0] @ hardware version + str r2, [r0] @ hardware version .endm @@ -213,9 +230,9 @@ * 1111 11 r/w r/w */ .macro xscale_set_pte_ext_prologue - str r1, [r0], #-2048 @ linux version + str r1, [r0] @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits orr r2, r2, #PTE_TYPE_EXT @ extended page @@ -223,7 +240,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w @ combined with user -> user r/w .endm @@ -232,7 +249,7 @@ tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? movne r2, #0 @ no -> fault - str r2, [r0] @ hardware version + str r2, [r0, #2048]! @ hardware version mov ip, #0 mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line mcr p15, 0, ip, c7, c10, 4 @ data write barrier diff --git a/proc-v7.S b/proc-v7.S index d2a8074..402ec2b 100644 --- a/proc-v7.S +++ b/proc-v7.S @@ -11,11 +11,11 @@ */ #include #include -#include +#include "hwcap.h" +#include "assembler.h" #include -#include #include -#include +#include "pgtable.h" #include "proc-macros.S" @@ -30,29 +30,24 @@ #define TTB_IRGN_WT ((1 << 0) | (0 << 6)) #define TTB_IRGN_WB ((1 << 0) | (1 << 6)) -#ifndef CONFIG_SMP /* PTWs cacheable, inner WB not shareable, outer WB not shareable */ -#define TTB_FLAGS TTB_IRGN_WB|TTB_RGN_OC_WB -#define PMD_FLAGS PMD_SECT_WB -#else +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + /* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ -#define TTB_FLAGS TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA -#define PMD_FLAGS PMD_SECT_WBWA|PMD_SECT_S -#endif +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v7_proc_init) mov pc, lr ENDPROC(cpu_v7_proc_init) ENTRY(cpu_v7_proc_fin) - stmfd sp!, {lr} - cpsid if @ disable interrupts - bl v7_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr ENDPROC(cpu_v7_proc_fin) /* @@ -63,8 +58,6 @@ ENDPROC(cpu_v7_proc_fin) * to what would be the reset vector. * * - loc - location to jump to for soft reset - * - * It is assumed that: */ .align 5 ENTRY(cpu_v7_reset) @@ -110,14 +103,21 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id - orr r0, r0, #TTB_FLAGS + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif +#ifdef CONFIG_ARM_ERRATA_754322 + dsb #endif mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID isb 1: mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 isb +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif mcr p15, 0, r1, c13, c0, 1 @ set context ID isb #endif @@ -130,15 +130,13 @@ ENDPROC(cpu_v7_switch_mm) * Set a level 2 translation table entry. * * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at -1024 bytes) + * (hardware version is stored at +2048 bytes) * - pte - PTE value to store * - ext - value for extended PTE bits */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - ARM( str r1, [r0], #-2048 ) @ linux version - THUMB( str r1, [r0] ) @ linux version - THUMB( sub r0, r0, #2048 ) + str r1, [r0] @ linux version bic r3, r1, #0x000003f0 bic r3, r3, #PTE_TYPE_MASK @@ -148,23 +146,28 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #1 << 4 orrne r3, r3, #PTE_EXT_TEX(1) - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 - str r3, [r0] + ARM( str r3, [r0, #2048]! ) + THUMB( add r0, r0, #2048 ) + THUMB( str r3, [r0] ) mcr p15, 0, r0, c7, c10, 1 @ flush_pte #endif mov pc, lr @@ -174,7 +177,88 @@ cpu_v7_name: .ascii "ARMv7 Processor" .align - __INIT + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ +.equ PRRR, 0xff0a81a8 +.equ NMRR, 0x40e040e0 + +/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ +.globl cpu_v7_suspend_size +.equ cpu_v7_suspend_size, 4 * 8 +#if 0 +ENTRY(cpu_v7_do_suspend) + stmfd sp!, {r4 - r11, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 1 @ Context ID + mrc p15, 0, r6, c3, c0, 0 @ Domain ID + mrc p15, 0, r7, c2, c0, 0 @ TTB 0 + mrc p15, 0, r8, c2, c0, 1 @ TTB 1 + mrc p15, 0, r9, c1, c0, 0 @ Control register + mrc p15, 0, r10, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r11, c1, c0, 2 @ Co-processor access control + stmia r0, {r4 - r11} + ldmfd sp!, {r4 - r11, pc} +ENDPROC(cpu_v7_do_suspend) + +ENTRY(cpu_v7_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + ldmia r0, {r4 - r11} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c13, c0, 1 @ Context ID + mcr p15, 0, r6, c3, c0, 0 @ Domain ID + mcr p15, 0, r7, c2, c0, 0 @ TTB 0 + mcr p15, 0, r8, c2, c0, 1 @ TTB 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register + mcr p15, 0, r10, c1, c0, 1 @ Auxiliary control register + mcr p15, 0, r11, c1, c0, 2 @ Co-processor access control + ldr r4, =PRRR @ PRRR + ldr r5, =NMRR @ NMRR + mcr p15, 0, r4, c10, c2, 0 @ write PRRR + mcr p15, 0, r5, c10, c2, 1 @ write NMRR + isb + mov r0, r9 @ control register + mov r2, r7, lsr #14 @ get TTB0 base + mov r2, r2, lsl #14 + ldr r3, cpu_resume_l1_flags + b cpu_resume_mmu +ENDPROC(cpu_v7_do_resume) +cpu_resume_l1_flags: + ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP) + ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP) +#else +#define cpu_v7_do_suspend 0 +#define cpu_v7_do_resume 0 +#endif + + __CPUINIT /* * __v7_setup @@ -191,13 +275,15 @@ cpu_v7_name: * It is assumed that: * - cache type register is implemented */ -__v7_setup: +__v7_ca9mp_setup: #ifdef CONFIG_SMP - mrc p15, 0, r0, c1, c0, 1 + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) + ALT_UP(mov r0, #(1 << 6)) @ fake it for UP tst r0, #(1 << 6) @ SMP/nAMP mode enabled? orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting #endif +__v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all @@ -206,11 +292,16 @@ __v7_setup: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 - bne 2f + bne 3f and r5, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision - orr r0, r6, r5, lsr #20-4 @ combine variant and revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f #ifdef CONFIG_ARM_ERRATA_430973 teq r5, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register @@ -218,21 +309,56 @@ __v7_setup: mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_458693 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 5) @ set L1NEON to 1 orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_460075 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register tsteq r10, #1 << 22 orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register #endif + b 3f + + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 6 @ set bit #6 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_751472 + cmp r6, #0x30 @ present prior to r3p0 + mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrlt r10, r10, #1 << 11 @ set bit #11 + mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif -2: mov r10, #0 +3: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif @@ -240,40 +366,11 @@ __v7_setup: #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r10, c2, c0, 2 @ TTB control register - orr r4, r4, #TTB_FLAGS + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) mcr p15, 0, r4, c2, c0, 1 @ load TTB1 - mov r10, #0x1f @ domains 0, 1 = manager - mcr p15, 0, r10, c3, c0, 0 @ load domain access register - /* - * Memory region attributes with SCTLR.TRE=1 - * - * n = TEX[0],C,B - * TR = PRRR[2n+1:2n] - memory type - * IR = NMRR[2n+1:2n] - inner cacheable property - * OR = NMRR[2n+17:2n+16] - outer cacheable property - * - * n TR IR OR - * UNCACHED 000 00 - * BUFFERABLE 001 10 00 00 - * WRITETHROUGH 010 10 10 10 - * WRITEBACK 011 10 11 11 - * reserved 110 - * WRITEALLOC 111 10 01 01 - * DEV_SHARED 100 01 - * DEV_NONSHARED 100 01 - * DEV_WC 001 10 - * DEV_CACHED 011 10 - * - * Other attributes: - * - * DS0 = PRRR[16] = 0 - device shareable property - * DS1 = PRRR[17] = 1 - device shareable property - * NS0 = PRRR[18] = 0 - normal shareable property - * NS1 = PRRR[19] = 1 - normal shareable property - * NOS = PRRR[24+n] = 1 - not outer shareable - */ - ldr r5, =0xff0a81a8 @ PRRR - ldr r6, =0x40e040e0 @ NMRR + ldr r5, =PRRR @ PRRR + ldr r6, =NMRR @ NMRR mcr p15, 0, r5, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif @@ -281,6 +378,10 @@ __v7_setup: ldmia r5, {r5, r6} #ifdef CONFIG_CPU_ENDIAN_BE8 orr r6, r6, #1 << 25 @ big-endian page tables +#endif +#ifdef CONFIG_SWP_EMULATE + orr r5, r5, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them @@ -302,6 +403,8 @@ v7_crval: __v7_setup_stack: .space 4 * 11 @ 11 registers + __INITDATA + .type v7_processor_functions, #object ENTRY(v7_processor_functions) .word v7_early_abort @@ -313,8 +416,13 @@ ENTRY(v7_processor_functions) .word cpu_v7_dcache_clean_area .word cpu_v7_switch_mm .word cpu_v7_set_pte_ext + .word 0 + .word 0 + .word 0 .size v7_processor_functions, . - v7_processor_functions + .section ".rodata" + .type cpu_arch_name, #object cpu_arch_name: .asciz "armv7" @@ -328,6 +436,35 @@ cpu_elf_name: .section ".proc.info.init", #alloc, #execinstr + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 @ Required ID value + .long 0xff0ffff0 @ Mask for ID + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_UP) + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + W(b) __v7_ca9mp_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS + .long cpu_v7_name + .long v7_processor_functions + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* * Match any ARMv7 processor core. */ @@ -335,18 +472,24 @@ cpu_elf_name: __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - .long PMD_TYPE_SECT | \ + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | \ - PMD_FLAGS + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v7_setup + W(b) __v7_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS .long cpu_v7_name .long v7_processor_functions .long v7wbi_tlb_fns diff --git a/relocate_kernel.S b/relocate_kernel.S index 61930eb..9cf4cbf 100644 --- a/relocate_kernel.S +++ b/relocate_kernel.S @@ -10,6 +10,12 @@ relocate_new_kernel: ldr r0,kexec_indirection_page ldr r1,kexec_start_address + /* + * If there is no indirection page (we are doing crashdumps) + * skip any relocation. + */ + cmp r0, #0 + beq 2f 0: /* top, read another word for the indirection page */ ldr r3, [r0],#4 @@ -53,6 +59,8 @@ relocate_new_kernel: ldr r2,kexec_boot_atags mov pc,lr + .align + .globl kexec_start_address kexec_start_address: .long 0x0 diff --git a/tlb-v7.S b/tlb-v7.S index a26a605..f8f3962 100644 --- a/tlb-v7.S +++ b/tlb-v7.S @@ -13,9 +13,10 @@ */ #include #include +#include "assembler.h" #include #include -#include +#include "tlbflush.h" #include "proc-macros.S" /* @@ -40,18 +41,16 @@ ENTRY(v7wbi_flush_user_tlb_range) asid r3, r3 @ mask ASID orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT - vma_vm_flags r2, r2 @ get vma->vm_flags 1: -#ifdef CONFIG_SMP - mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA -#endif + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b mov ip, #0 - mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB + ALT_SMP(mcr p15, 0, ip, c7, c1, 6) @ flush BTAC/BTB Inner Shareable + ALT_UP(mcr p15, 0, ip, c7, c5, 6) @ flush BTAC/BTB dsb mov pc, lr ENDPROC(v7wbi_flush_user_tlb_range) @@ -71,16 +70,14 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: -#ifdef CONFIG_SMP - mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA -#endif + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b mov r2, #0 - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + ALT_SMP(mcr p15, 0, r2, c7, c1, 6) @ flush BTAC/BTB Inner Shareable + ALT_UP(mcr p15, 0, r2, c7, c5, 6) @ flush BTAC/BTB dsb isb mov pc, lr @@ -92,5 +89,6 @@ ENDPROC(v7wbi_flush_kern_tlb_range) ENTRY(v7wbi_tlb_fns) .long v7wbi_flush_user_tlb_range .long v7wbi_flush_kern_tlb_range - .long v7wbi_tlb_flags + ALT_SMP(.long v7wbi_tlb_flags_smp) + ALT_UP(.long v7wbi_tlb_flags_up) .size v7wbi_tlb_fns, . - v7wbi_tlb_fns diff --git a/tlbflush.h b/tlbflush.h new file mode 100644 index 0000000..d2005de --- /dev/null +++ b/tlbflush.h @@ -0,0 +1,587 @@ +/* + * arch/arm/include/asm/tlbflush.h + * + * Copyright (C) 1999-2003 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASMARM_TLBFLUSH_H +#define _ASMARM_TLBFLUSH_H + +#ifdef CONFIG_MMU + +#include + +#define TLB_V3_PAGE (1 << 0) +#define TLB_V4_U_PAGE (1 << 1) +#define TLB_V4_D_PAGE (1 << 2) +#define TLB_V4_I_PAGE (1 << 3) +#define TLB_V6_U_PAGE (1 << 4) +#define TLB_V6_D_PAGE (1 << 5) +#define TLB_V6_I_PAGE (1 << 6) + +#define TLB_V3_FULL (1 << 8) +#define TLB_V4_U_FULL (1 << 9) +#define TLB_V4_D_FULL (1 << 10) +#define TLB_V4_I_FULL (1 << 11) +#define TLB_V6_U_FULL (1 << 12) +#define TLB_V6_D_FULL (1 << 13) +#define TLB_V6_I_FULL (1 << 14) + +#define TLB_V6_U_ASID (1 << 16) +#define TLB_V6_D_ASID (1 << 17) +#define TLB_V6_I_ASID (1 << 18) + +#define TLB_BTB (1 << 28) + +/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */ +#define TLB_V7_UIS_PAGE (1 << 19) +#define TLB_V7_UIS_FULL (1 << 20) +#define TLB_V7_UIS_ASID (1 << 21) + +/* Inner Shareable BTB operation (ARMv7 MP extensions) */ +#define TLB_V7_IS_BTB (1 << 22) + +#define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ +#define TLB_DCLEAN (1 << 30) +#define TLB_WB (1 << 31) + +/* + * MMU TLB Model + * ============= + * + * We have the following to choose from: + * v3 - ARMv3 + * v4 - ARMv4 without write buffer + * v4wb - ARMv4 with write buffer without I TLB flush entry instruction + * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction + * fr - Feroceon (v4wbi with non-outer-cacheable page table walks) + * fa - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB)) + * v6wbi - ARMv6 with write buffer with I TLB flush entry instruction + * v7wbi - identical to v6wbi + */ +#undef _TLB +#undef MULTI_TLB + +#ifdef CONFIG_SMP_ON_UP +#define MULTI_TLB 1 +#endif + +#define v3_tlb_flags (TLB_V3_FULL | TLB_V3_PAGE) + +#ifdef CONFIG_CPU_TLB_V3 +# define v3_possible_flags v3_tlb_flags +# define v3_always_flags v3_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v3 +# endif +#else +# define v3_possible_flags 0 +# define v3_always_flags (-1UL) +#endif + +#define v4_tlb_flags (TLB_V4_U_FULL | TLB_V4_U_PAGE) + +#ifdef CONFIG_CPU_TLB_V4WT +# define v4_possible_flags v4_tlb_flags +# define v4_always_flags v4_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v4 +# endif +#else +# define v4_possible_flags 0 +# define v4_always_flags (-1UL) +#endif + +#define fa_tlb_flags (TLB_WB | TLB_BTB | TLB_DCLEAN | \ + TLB_V4_U_FULL | TLB_V4_U_PAGE) + +#ifdef CONFIG_CPU_TLB_FA +# define fa_possible_flags fa_tlb_flags +# define fa_always_flags fa_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB fa +# endif +#else +# define fa_possible_flags 0 +# define fa_always_flags (-1UL) +#endif + +#define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \ + TLB_V4_I_FULL | TLB_V4_D_FULL | \ + TLB_V4_I_PAGE | TLB_V4_D_PAGE) + +#ifdef CONFIG_CPU_TLB_V4WBI +# define v4wbi_possible_flags v4wbi_tlb_flags +# define v4wbi_always_flags v4wbi_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v4wbi +# endif +#else +# define v4wbi_possible_flags 0 +# define v4wbi_always_flags (-1UL) +#endif + +#define fr_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \ + TLB_V4_I_FULL | TLB_V4_D_FULL | \ + TLB_V4_I_PAGE | TLB_V4_D_PAGE) + +#ifdef CONFIG_CPU_TLB_FEROCEON +# define fr_possible_flags fr_tlb_flags +# define fr_always_flags fr_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v4wbi +# endif +#else +# define fr_possible_flags 0 +# define fr_always_flags (-1UL) +#endif + +#define v4wb_tlb_flags (TLB_WB | TLB_DCLEAN | \ + TLB_V4_I_FULL | TLB_V4_D_FULL | \ + TLB_V4_D_PAGE) + +#ifdef CONFIG_CPU_TLB_V4WB +# define v4wb_possible_flags v4wb_tlb_flags +# define v4wb_always_flags v4wb_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v4wb +# endif +#else +# define v4wb_possible_flags 0 +# define v4wb_always_flags (-1UL) +#endif + +#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ + TLB_V6_I_FULL | TLB_V6_D_FULL | \ + TLB_V6_I_PAGE | TLB_V6_D_PAGE | \ + TLB_V6_I_ASID | TLB_V6_D_ASID) + +#ifdef CONFIG_CPU_TLB_V6 +# define v6wbi_possible_flags v6wbi_tlb_flags +# define v6wbi_always_flags v6wbi_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v6wbi +# endif +#else +# define v6wbi_possible_flags 0 +# define v6wbi_always_flags (-1UL) +#endif + +#define v7wbi_tlb_flags_smp (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \ + TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) +#define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BTB | \ + TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID) + +#ifdef CONFIG_CPU_TLB_V7 + +# ifdef CONFIG_SMP_ON_UP +# define v7wbi_possible_flags (v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up) +# define v7wbi_always_flags (v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up) +# elif defined(CONFIG_SMP) +# define v7wbi_possible_flags v7wbi_tlb_flags_smp +# define v7wbi_always_flags v7wbi_tlb_flags_smp +# else +# define v7wbi_possible_flags v7wbi_tlb_flags_up +# define v7wbi_always_flags v7wbi_tlb_flags_up +# endif +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v7wbi +# endif +#else +# define v7wbi_possible_flags 0 +# define v7wbi_always_flags (-1UL) +#endif + +#ifndef _TLB +#error Unknown TLB model +#endif + +#ifndef __ASSEMBLY__ + +#include + +struct cpu_tlb_fns { + void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *); + void (*flush_kern_range)(unsigned long, unsigned long); + unsigned long tlb_flags; +}; + +/* + * Select the calling method + */ +#ifdef MULTI_TLB + +#define __cpu_flush_user_tlb_range cpu_tlb.flush_user_range +#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range + +#else + +#define __cpu_flush_user_tlb_range __glue(_TLB,_flush_user_tlb_range) +#define __cpu_flush_kern_tlb_range __glue(_TLB,_flush_kern_tlb_range) + +extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long); + +#endif + +extern struct cpu_tlb_fns cpu_tlb; + +#define __cpu_tlb_flags cpu_tlb.tlb_flags + +/* + * TLB Management + * ============== + * + * The arch/arm/mm/tlb-*.S files implement these methods. + * + * The TLB specific code is expected to perform whatever tests it + * needs to determine if it should invalidate the TLB for each + * call. Start addresses are inclusive and end addresses are + * exclusive; it is safe to round these addresses down. + * + * flush_tlb_all() + * + * Invalidate the entire TLB. + * + * flush_tlb_mm(mm) + * + * Invalidate all TLB entries in a particular address + * space. + * - mm - mm_struct describing address space + * + * flush_tlb_range(mm,start,end) + * + * Invalidate a range of TLB entries in the specified + * address space. + * - mm - mm_struct describing address space + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * + * flush_tlb_page(vaddr,vma) + * + * Invalidate the specified page in the specified address range. + * - vaddr - virtual address (may not be aligned) + * - vma - vma_struct describing address range + * + * flush_kern_tlb_page(kaddr) + * + * Invalidate the TLB entry for the specified page. The address + * will be in the kernels virtual memory space. Current uses + * only require the D-TLB to be invalidated. + * - kaddr - Kernel virtual memory address + */ + +/* + * We optimise the code below by: + * - building a set of TLB flags that might be set in __cpu_tlb_flags + * - building a set of TLB flags that will always be set in __cpu_tlb_flags + * - if we're going to need __cpu_tlb_flags, access it once and only once + * + * This allows us to build optimal assembly for the single-CPU type case, + * and as close to optimal given the compiler constrants for multi-CPU + * case. We could do better for the multi-CPU case if the compiler + * implemented the "%?" method, but this has been discontinued due to too + * many people getting it wrong. + */ +#define possible_tlb_flags (v3_possible_flags | \ + v4_possible_flags | \ + v4wbi_possible_flags | \ + fr_possible_flags | \ + v4wb_possible_flags | \ + fa_possible_flags | \ + v6wbi_possible_flags | \ + v7wbi_possible_flags) + +#define always_tlb_flags (v3_always_flags & \ + v4_always_flags & \ + v4wbi_always_flags & \ + fr_always_flags & \ + v4wb_always_flags & \ + fa_always_flags & \ + v6wbi_always_flags & \ + v7wbi_always_flags) + +#define tlb_flag(f) ((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f))) + +static inline void local_flush_tlb_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_WB)) + dsb(); + + if (tlb_flag(TLB_V3_FULL)) + asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_U_FULL | TLB_V6_U_FULL)) + asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_D_FULL | TLB_V6_D_FULL)) + asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL)) + asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V7_UIS_FULL)) + asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc"); + + if (tlb_flag(TLB_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } +} + +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + const int zero = 0; + const int asid = ASID(mm); + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_WB)) + dsb(); + + if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { + if (tlb_flag(TLB_V3_FULL)) + asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_U_FULL)) + asm("mcr p15, 0, %0, c8, c7, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_D_FULL)) + asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc"); + if (tlb_flag(TLB_V4_I_FULL)) + asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); + } + put_cpu(); + + if (tlb_flag(TLB_V6_U_ASID)) + asm("mcr p15, 0, %0, c8, c7, 2" : : "r" (asid) : "cc"); + if (tlb_flag(TLB_V6_D_ASID)) + asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc"); + if (tlb_flag(TLB_V6_I_ASID)) + asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc"); + if (tlb_flag(TLB_V7_UIS_ASID)) +#ifdef CONFIG_ARM_ERRATA_720789 + asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc"); +#else + asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc"); +#endif + + if (tlb_flag(TLB_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); + dsb(); + } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } +} + +static inline void +local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); + + if (tlb_flag(TLB_WB)) + dsb(); + + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + if (tlb_flag(TLB_V3_PAGE)) + asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V4_U_PAGE)) + asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V4_D_PAGE)) + asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V4_I_PAGE)) + asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); + if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) + asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); + } + + if (tlb_flag(TLB_V6_U_PAGE)) + asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V6_D_PAGE)) + asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V6_I_PAGE)) + asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc"); + if (tlb_flag(TLB_V7_UIS_PAGE)) +#ifdef CONFIG_ARM_ERRATA_720789 + asm("mcr p15, 0, %0, c8, c3, 3" : : "r" (uaddr & PAGE_MASK) : "cc"); +#else + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc"); +#endif + + if (tlb_flag(TLB_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); + dsb(); + } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } +} + +static inline void local_flush_tlb_kernel_page(unsigned long kaddr) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + kaddr &= PAGE_MASK; + + if (tlb_flag(TLB_WB)) + dsb(); + + if (tlb_flag(TLB_V3_PAGE)) + asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V4_U_PAGE)) + asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V4_D_PAGE)) + asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V4_I_PAGE)) + asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); + if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL)) + asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc"); + + if (tlb_flag(TLB_V6_U_PAGE)) + asm("mcr p15, 0, %0, c8, c7, 1" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V6_D_PAGE)) + asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V6_I_PAGE)) + asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc"); + if (tlb_flag(TLB_V7_UIS_PAGE)) + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc"); + + if (tlb_flag(TLB_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } +} + +/* + * flush_pmd_entry + * + * Flush a PMD entry (word aligned, or double-word aligned) to + * RAM if the TLB for the CPU we are running on requires this. + * This is typically used when we are creating PMD entries. + * + * clean_pmd_entry + * + * Clean (but don't drain the write buffer) if the CPU requires + * these operations. This is typically used when we are removing + * PMD entries. + */ +static inline void flush_pmd_entry(pmd_t *pmd) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_DCLEAN)) + asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" + : : "r" (pmd) : "cc"); + + if (tlb_flag(TLB_L2CLEAN_FR)) + asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" + : : "r" (pmd) : "cc"); + + if (tlb_flag(TLB_WB)) + dsb(); +} + +static inline void clean_pmd_entry(pmd_t *pmd) +{ + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_DCLEAN)) + asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" + : : "r" (pmd) : "cc"); + + if (tlb_flag(TLB_L2CLEAN_FR)) + asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" + : : "r" (pmd) : "cc"); +} + +#undef tlb_flag +#undef always_tlb_flags +#undef possible_tlb_flags + +/* + * Convert calls to our calling convention. + */ +#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma) +#define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e) + +#ifndef CONFIG_SMP +#define flush_tlb_all local_flush_tlb_all +#define flush_tlb_mm local_flush_tlb_mm +#define flush_tlb_page local_flush_tlb_page +#define flush_tlb_kernel_page local_flush_tlb_kernel_page +#define flush_tlb_range local_flush_tlb_range +#define flush_tlb_kernel_range local_flush_tlb_kernel_range +#else +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr); +extern void flush_tlb_kernel_page(unsigned long kaddr); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +#endif + +/* + * If PG_dcache_clean is not set for the page, we need to ensure that any + * cache entries for the kernels virtual memory range are written + * back to the page. On ARMv6 and later, the cache coherency is handled via + * the set_pte_at() function. + */ +#if __LINUX_ARM_ARCH__ < 6 +extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep); +#else +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} +#endif + +#endif + +#endif /* CONFIG_MMU */ + +#endif