From: Michael Gernoth Date: Sun, 22 May 2011 10:53:32 +0000 (+0200) Subject: update assembler files to versions from MS2 kernel X-Git-Url: https://git.zerfleddert.de/cgi-bin/gitweb.cgi/ms2-kexec/commitdiff_plain/5db3fe53b67183d8297fae089cc728a38a4b965c?ds=sidebyside update assembler files to versions from MS2 kernel --- diff --git a/Makefile b/Makefile index 1f9a4d2..8f1f503 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS += -DCONFIG_KEXEC -Wall obj-m += kexec_load.o kexec_load-objs := kexec.o machine_kexec.o mmu.o sys.o core.o relocate_kernel.o \ - proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o copypage-v6.o entry-common.o driver_sys.o + proc-v7.o tlb-v7.o cache-v7.o abort-ev7.o pabort-v7.o copypage-v6.o entry-common.o driver_sys.o all: PATH=$(CROSS_PATH):$(PATH) CROSS_COMPILE=$(CROSS_COMPILE) ARCH=$(ARCH) make -C $(KDIR) M=$(PWD) V=1 modules diff --git a/cache-v7.S b/cache-v7.S index bd38e61..e1bd975 100644 --- a/cache-v7.S +++ b/cache-v7.S @@ -3,7 +3,6 @@ * * Copyright (C) 2001 Deep Blue Solutions Ltd. * Copyright (C) 2005 ARM Ltd. - * This Edition is maintained by Matthew Veety (aliasxerog) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,6 +13,7 @@ #include #include #include +#include #include "proc-macros.S" @@ -22,7 +22,7 @@ * * Flush the whole D-cache. * - * Corrupted registers: r0-r5, r7, r9-r11 + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) * * - mm - mm_struct describing address space */ @@ -52,8 +52,12 @@ loop1: loop2: mov r9, r4 @ create working copy of max way size loop3: - orr r11, r10, r9, lsl r5 @ factor way and cache number into r11 - orr r11, r11, r7, lsl r2 @ factor index number into r11 + ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r9, r5 ) + THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 + ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r7, r2 ) + THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the way bge loop3 @@ -83,11 +87,13 @@ ENDPROC(v7_flush_dcache_all) * */ ENTRY(v7_flush_kern_cache_all) - stmfd sp!, {r4-r5, r7, r9-r11, lr} + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) bl v7_flush_dcache_all mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate - ldmfd sp!, {r4-r5, r7, r9-r11, lr} + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) mov pc, lr ENDPROC(v7_flush_kern_cache_all) @@ -148,13 +154,16 @@ ENTRY(v7_coherent_kern_range) * - the Icache does not read data from the write buffer */ ENTRY(v7_coherent_user_range) + UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 -1: mcr p15, 0, r0, c7, c11, 1 @ clean D line to the point of unification +1: + USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification dsb - mcr p15, 0, r0, c7, c5, 1 @ invalidate I line + USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line add r0, r0, r2 +2: cmp r0, r1 blo 1b mov r0, #0 @@ -162,6 +171,17 @@ ENTRY(v7_coherent_user_range) dsb isb mov pc, lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, just try the next page. + */ +9001: + mov r0, r0, lsr #12 + mov r0, r0, lsl #12 + add r0, r0, #4096 + b 2b + UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) diff --git a/entry-header.S b/entry-header.S index 87ab4e1..7e9ed1e 100644 --- a/entry-header.S +++ b/entry-header.S @@ -36,11 +36,6 @@ #endif .endm - .macro get_thread_info, rd - mov \rd, sp, lsr #13 - mov \rd, \rd, lsl #13 - .endm - .macro alignment_trap, rtemp #ifdef CONFIG_ALIGNMENT_TRAP ldr \rtemp, .LCcralign @@ -49,6 +44,124 @@ #endif .endm + @ + @ Store/load the USER SP and LR registers by switching to the SYS + @ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not + @ available. Should only be called from SVC mode + @ + .macro store_user_sp_lr, rd, rtemp, offset = 0 + mrs \rtemp, cpsr + eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) + msr cpsr_c, \rtemp @ switch to the SYS mode + + str sp, [\rd, #\offset] @ save sp_usr + str lr, [\rd, #\offset + 4] @ save lr_usr + + eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) + msr cpsr_c, \rtemp @ switch back to the SVC mode + .endm + + .macro load_user_sp_lr, rd, rtemp, offset = 0 + mrs \rtemp, cpsr + eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) + msr cpsr_c, \rtemp @ switch to the SYS mode + + ldr sp, [\rd, #\offset] @ load sp_usr + ldr lr, [\rd, #\offset + 4] @ load lr_usr + + eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) + msr cpsr_c, \rtemp @ switch back to the SVC mode + .endm + +#ifndef CONFIG_THUMB2_KERNEL + .macro svc_exit, rpsr + msr spsr_cxsf, \rpsr +#if defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#elif defined (CONFIG_CPU_V6) + ldr r0, [sp] + strex r1, r2, [sp] @ clear the exclusive monitor + ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr +#else + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#endif + .endm + + .macro restore_user_regs, fast = 0, offset = 0 + ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr + ldr lr, [sp, #\offset + S_PC]! @ get pc + msr spsr_cxsf, r1 @ save in spsr_svc +#if defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor +#elif defined (CONFIG_CPU_V6) + strex r1, r2, [sp] @ clear the exclusive monitor +#endif + .if \fast + ldmdb sp, {r1 - lr}^ @ get calling r1 - lr + .else + ldmdb sp, {r0 - lr}^ @ get calling r0 - lr + .endif + add sp, sp, #S_FRAME_SIZE - S_PC + movs pc, lr @ return & move spsr_svc into cpsr + .endm + + .macro get_thread_info, rd + mov \rd, sp, lsr #13 + mov \rd, \rd, lsl #13 + .endm + + @ + @ 32-bit wide "mov pc, reg" + @ + .macro movw_pc, reg + mov pc, \reg + .endm +#else /* CONFIG_THUMB2_KERNEL */ + .macro svc_exit, rpsr + clrex @ clear the exclusive monitor + ldr r0, [sp, #S_SP] @ top of the stack + ldr r1, [sp, #S_PC] @ return address + tst r0, #4 @ orig stack 8-byte aligned? + stmdb r0, {r1, \rpsr} @ rfe context + ldmia sp, {r0 - r12} + ldr lr, [sp, #S_LR] + addeq sp, sp, #S_FRAME_SIZE - 8 @ aligned + addne sp, sp, #S_FRAME_SIZE - 4 @ not aligned + rfeia sp! + .endm + + .macro restore_user_regs, fast = 0, offset = 0 + clrex @ clear the exclusive monitor + mov r2, sp + load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr + ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr + ldr lr, [sp, #\offset + S_PC] @ get pc + add sp, sp, #\offset + S_SP + msr spsr_cxsf, r1 @ save in spsr_svc + .if \fast + ldmdb sp, {r1 - r12} @ get calling r1 - r12 + .else + ldmdb sp, {r0 - r12} @ get calling r0 - r12 + .endif + add sp, sp, #S_FRAME_SIZE - S_SP + movs pc, lr @ return & move spsr_svc into cpsr + .endm + + .macro get_thread_info, rd + mov \rd, sp + lsr \rd, \rd, #13 + mov \rd, \rd, lsl #13 + .endm + + @ + @ 32-bit wide "mov pc, reg" + @ + .macro movw_pc, reg + mov pc, \reg + nop + .endm +#endif /* !CONFIG_THUMB2_KERNEL */ /* * These are the registers used in the syscall handler, and allow us to diff --git a/pabort-v7.S b/pabort-v7.S new file mode 100644 index 0000000..a8b3b30 --- /dev/null +++ b/pabort-v7.S @@ -0,0 +1,20 @@ +#include +#include + +/* + * Function: v6_pabort + * + * Params : r0 = address of aborted instruction + * + * Returns : r0 = address of abort + * : r1 = IFSR + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v7_pabort) + mrc p15, 0, r0, c6, c0, 2 @ get IFAR + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + mov pc, lr +ENDPROC(v7_pabort) diff --git a/proc-macros.S b/proc-macros.S index 54b1f72..7d63bea 100644 --- a/proc-macros.S +++ b/proc-macros.S @@ -77,19 +77,15 @@ * Sanity check the PTE configuration for the code below - which makes * certain assumptions about how these bits are layed out. */ +#ifdef CONFIG_MMU #if L_PTE_SHARED != PTE_EXT_SHARED #error PTE shared bit mismatch #endif -#if L_PTE_BUFFERABLE != PTE_BUFFERABLE -#error PTE bufferable bit mismatch -#endif -#if L_PTE_CACHEABLE != PTE_CACHEABLE -#error PTE cacheable bit mismatch -#endif #if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\ L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED #error Invalid Linux PTE bit settings #endif +#endif /* CONFIG_MMU */ /* * The ARMv6 and ARMv7 set_pte_ext translation function. diff --git a/proc-v7.S b/proc-v7.S index 555f491..3a28521 100644 --- a/proc-v7.S +++ b/proc-v7.S @@ -2,7 +2,6 @@ * linux/arch/arm/mm/proc-v7.S * * Copyright (C) 2001 Deep Blue Solutions Ltd. - * This Edition is maintained by Matthew Veety (aliasxerog) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -10,6 +9,7 @@ * * This is the "shell" of the ARMv7 processor support. */ +#include #include #include #include @@ -19,17 +19,25 @@ #include "proc-macros.S" -#define TTB_C (1 << 0) #define TTB_S (1 << 1) #define TTB_RGN_NC (0 << 3) #define TTB_RGN_OC_WBWA (1 << 3) #define TTB_RGN_OC_WT (2 << 3) #define TTB_RGN_OC_WB (3 << 3) +#define TTB_NOS (1 << 5) +#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) +#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) +#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) +#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) #ifndef CONFIG_SMP -#define TTB_FLAGS TTB_C|TTB_RGN_OC_WB @ mark PTWs cacheable, outer WB +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS PMD_SECT_WB #else -#define TTB_FLAGS TTB_C|TTB_S|TTB_RGN_OC_WBWA @ mark PTWs cacheable and shared, outer WBWA +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS PMD_SECT_WBWA|PMD_SECT_S #endif ENTRY(cpu_v7_proc_init) @@ -97,7 +105,7 @@ ENTRY(cpu_v7_switch_mm) ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id orr r0, r0, #TTB_FLAGS #ifdef CONFIG_ARM_ERRATA_430973 - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID isb @@ -121,7 +129,9 @@ ENDPROC(cpu_v7_switch_mm) */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - str r1, [r0], #-2048 @ linux version + ARM( str r1, [r0], #-2048 ) @ linux version + THUMB( str r1, [r0] ) @ linux version + THUMB( sub r0, r0, #2048 ) bic r3, r1, #0x000003f0 bic r3, r3, #PTE_TYPE_MASK @@ -157,7 +167,7 @@ cpu_v7_name: .ascii "ARMv7 Processor" .align - .section ".text.init", #alloc, #execinstr + __INIT /* * __v7_setup @@ -176,20 +186,46 @@ cpu_v7_name: */ __v7_setup: #ifdef CONFIG_SMP - mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode - orr r0, r0, #(0x1 << 6) - mcr p15, 0, r0, c1, c0, 1 + mrc p15, 0, r0, c1, c0, 1 + tst r0, #(1 << 6) @ SMP/nAMP mode enabled? + orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and + mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting #endif adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all ldmia r12, {r0-r5, r7, r9, r11, lr} + + mrc p15, 0, r0, c0, c0, 0 @ read main ID register + and r10, r0, #0xff000000 @ ARM? + teq r10, #0x41000000 + bne 2f + and r5, r0, #0x00f00000 @ variant + and r6, r0, #0x0000000f @ revision + orr r0, r6, r5, lsr #20-4 @ combine variant and revision + #ifdef CONFIG_ARM_ERRATA_430973 - mrc p15, 0, r10, c1, c0, 1 @ read aux control register - orr r10, r10, #(1 << 6) @ set IBE to 1 - mcr p15, 0, r10, c1, c0, 1 @ write aux control register + teq r5, #0x00100000 @ only present in r1p* + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r0, #0x20 @ only present in r2p0 + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 5) @ set L1NEON to 1 + orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif - mov r10, #0 +#ifdef CONFIG_ARM_ERRATA_460075 + teq r0, #0x20 @ only present in r2p0 + mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r10, #1 << 22 + orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + +2: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif @@ -201,28 +237,60 @@ __v7_setup: mcr p15, 0, r4, c2, c0, 1 @ load TTB1 mov r10, #0x1f @ domains 0, 1 = manager mcr p15, 0, r10, c3, c0, 0 @ load domain access register -#endif - ldr r5, =0xff0aa1a8 - ldr r6, =0x40e040e0 + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ + ldr r5, =0xff0a81a8 @ PRRR + ldr r6, =0x40e040e0 @ NMRR mcr p15, 0, r5, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR +#endif adr r5, v7_crval ldmia r5, {r5, r6} +#ifdef CONFIG_CPU_ENDIAN_BE8 + orr r6, r6, #1 << 25 @ big-endian page tables +#endif mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them + THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions mov pc, lr @ return to head.S:__ret ENDPROC(__v7_setup) /* AT - * TFR EV X F I D LR - * .EEE ..EE PUI. .T.T 4RVI ZFRS BLDP WCAM + * TFR EV X F I D LR S + * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 1 0 110 0011 1.00 .111 1101 < we want + * 1 0 110 0011 1100 .111 1101 < we want */ .type v7_crval, #object v7_crval: - crval clear=0x0120c302, mmuset=0x10c0387d, ucset=0x00c0187c + crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c __v7_setup_stack: .space 4 * 11 @ 11 registers @@ -230,7 +298,7 @@ __v7_setup_stack: .type v7_processor_functions, #object ENTRY(v7_processor_functions) .word v7_early_abort - .word pabort_ifar + .word v7_pabort .word cpu_v7_proc_init .word cpu_v7_proc_fin .word cpu_v7_reset @@ -261,10 +329,9 @@ __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ + PMD_SECT_AP_READ | \ + PMD_FLAGS .long PMD_TYPE_SECT | \ PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ diff --git a/tlb-v7.S b/tlb-v7.S index 12e3bfe..a26a605 100644 --- a/tlb-v7.S +++ b/tlb-v7.S @@ -3,7 +3,6 @@ * * Copyright (C) 1997-2002 Russell King * Modified for ARMv7 by Catalin Marinas - * This Edition is maintained by Matthew Veety (aliasxerog) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,6 +11,7 @@ * ARM architecture version 6 TLB handling functions. * These assume a split I/D TLB. */ +#include #include #include #include @@ -42,9 +42,11 @@ ENTRY(v7wbi_flush_user_tlb_range) mov r1, r1, lsl #PAGE_SHIFT vma_vm_flags r2, r2 @ get vma->vm_flags 1: - mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) - tst r2, #VM_EXEC @ Executable area ? - mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) +#ifdef CONFIG_SMP + mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA +#endif add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b @@ -69,8 +71,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: - mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA - mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA +#ifdef CONFIG_SMP + mcr p15, 0, r0, c8, c3, 1 @ TLB invalidate U MVA (shareable) +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate U MVA +#endif add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b @@ -81,11 +86,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov pc, lr ENDPROC(v7wbi_flush_kern_tlb_range) - .section ".text.init", #alloc, #execinstr + __INIT .type v7wbi_tlb_fns, #object ENTRY(v7wbi_tlb_fns) .long v7wbi_flush_user_tlb_range .long v7wbi_flush_kern_tlb_range - .long v6wbi_tlb_flags + .long v7wbi_tlb_flags .size v7wbi_tlb_fns, . - v7wbi_tlb_fns