u-boot/arch/xtensa/cpu/start.S
Chris Zankel c978b52410 xtensa: add support for the xtensa processor architecture [2/2]
The Xtensa processor architecture is a configurable, extensible,
and synthesizable 32-bit RISC processor core provided by Tensilica, inc.

This is the second part of the basic architecture port, adding the
'arch/xtensa' directory and a readme file.

Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Tom Rini <trini@konsulko.com>
2016-08-15 18:46:38 -04:00

677 lines
13 KiB
ArmAsm

/*
* (C) Copyright 2008 - 2013 Tensilica Inc.
* (C) Copyright 2014 - 2016 Cadence Design Systems Inc.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <config.h>
#include <asm/asmmacro.h>
#include <asm/cacheasm.h>
#include <asm/regs.h>
#include <asm/arch/tie.h>
#include <asm-offsets.h>
/*
* Offsets into the the pt_regs struture.
* Make sure these always match with the structure defined in ptrace.h!
*/
#define PT_PC 0
#define PT_PS 4
#define PT_DEPC 8
#define PT_EXCCAUSE 12
#define PT_EXCVADDR 16
#define PT_DEBUGCAUSE 20
#define PT_WMASK 24
#define PT_LBEG 28
#define PT_LEND 32
#define PT_LCOUNT 36
#define PT_SAR 40
#define PT_WINDOWBASE 44
#define PT_WINDOWSTART 48
#define PT_SYSCALL 52
#define PT_ICOUNTLEVEL 56
#define PT_RESERVED 60
#define PT_AREG 64
#define PT_SIZE (64 + 64)
/*
* Cache attributes are different for full MMU and region protection.
*/
#if XCHAL_HAVE_PTP_MMU
#define CA_WRITEBACK (0x7)
#else
#define CA_WRITEBACK (0x4)
#endif
/*
* Reset vector.
* Only a trampoline to jump to _start
* (Note that we have to mark the section writable as the section contains
* a relocatable literal)
*/
.section .ResetVector.text, "awx"
.global _ResetVector
_ResetVector:
j 1f
.align 4
2: .long _start
1: l32r a2, 2b
jx a2
/*
* Processor initialization. We still run in rom space.
*
* NOTE: Running in ROM
* For Xtensa, we currently don't allow to run some code from ROM but
* unpack the data immediately to memory. This requires, for example,
* that DDR has been set up before running U-Boot. (See also comments
* inline for ways to change it)
*/
.section .reset.text, "ax"
.global _start
.align 4
_start:
/* Keep a0 = 0 for various initializations */
movi a0, 0
/*
* For full MMU cores, put page table at unmapped virtual address.
* This ensures that accesses outside the static maps result
* in miss exceptions rather than random behaviour.
*/
#if XCHAL_HAVE_PTP_MMU
wsr a0, PTEVADDR
#endif
/* Disable dbreak debug exceptions */
#if XCHAL_HAVE_DEBUG && XCHAL_NUM_DBREAK > 0
.set _index, 0
.rept XCHAL_NUM_DBREAK
wsr a0, DBREAKC + _index
.set _index, _index + 1
.endr
#endif
/* Reset windowbase and windowstart */
#if XCHAL_HAVE_WINDOWED
movi a3, 1
wsr a3, windowstart
wsr a0, windowbase
rsync
movi a0, 0 /* windowbase might have changed */
#endif
/*
* Vecbase in bitstream may differ from header files
* set or check it.
*/
#if XCHAL_HAVE_VECBASE
movi a3, XCHAL_VECBASE_RESET_VADDR /* VECBASE reset value */
wsr a3, VECBASE
#endif
#if XCHAL_HAVE_LOOPS
/* Disable loops */
wsr a0, LCOUNT
#endif
/* Set PS.WOE = 0, PS.EXCM = 0 (for loop), PS.INTLEVEL = EXCM level */
#if XCHAL_HAVE_XEA1
movi a2, 1
#else
movi a2, XCHAL_EXCM_LEVEL
#endif
wsr a2, PS
rsync
/* Unlock and invalidate caches */
___unlock_dcache_all a2, a3
___invalidate_dcache_all a2, a3
___unlock_icache_all a2, a3
___invalidate_icache_all a2, a3
isync
/* Unpack data sections */
movi a2, __reloc_table_start
movi a3, __reloc_table_end
1: beq a2, a3, 3f # no more entries?
l32i a4, a2, 0 # start destination (in RAM)
l32i a5, a2, 4 # end destination (in RAM)
l32i a6, a2, 8 # start source (in ROM)
addi a2, a2, 12 # next entry
beq a4, a5, 1b # skip, empty entry
beq a4, a6, 1b # skip, source and destination are the same
/* If there's memory protection option with 512MB TLB regions and
* cache attributes in TLB entries and caching is not inhibited,
* enable data/instruction cache for relocated image.
*/
#if XCHAL_HAVE_SPANNING_WAY && \
(!defined(CONFIG_SYS_DCACHE_OFF) || \
!defined(CONFIG_SYS_ICACHE_OFF))
srli a7, a4, 29
slli a7, a7, 29
addi a7, a7, XCHAL_SPANNING_WAY
#ifndef CONFIG_SYS_DCACHE_OFF
rdtlb1 a8, a7
srli a8, a8, 4
slli a8, a8, 4
addi a8, a8, CA_WRITEBACK
wdtlb a8, a7
#endif
#ifndef CONFIG_SYS_ICACHE_OFF
ritlb1 a8, a7
srli a8, a8, 4
slli a8, a8, 4
addi a8, a8, CA_WRITEBACK
witlb a8, a7
#endif
isync
#endif
2: l32i a7, a6, 0
addi a6, a6, 4
s32i a7, a4, 0
addi a4, a4, 4
bltu a4, a5, 2b
j 1b
3: /* All code and initalized data segments have been copied */
/* Setup PS, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */
#if __XTENSA_CALL0_ABI__
movi a2, XCHAL_EXCM_LEVEL
#else
movi a2, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL
#endif
wsr a2, PS
rsync
/* Writeback */
___flush_dcache_all a2, a3
#ifdef __XTENSA_WINDOWED_ABI__
/*
* In windowed ABI caller and call target need to be within the same
* gigabyte. Put the rest of the code into the text segment and jump
* there.
*/
movi a4, .Lboard_init_code
jx a4
.text
.align 4
.Lboard_init_code:
#endif
movi a0, 0
movi sp, (CONFIG_SYS_TEXT_ADDR - 16) & 0xfffffff0
#ifdef CONFIG_DEBUG_UART
movi a4, debug_uart_init
#ifdef __XTENSA_CALL0_ABI__
callx0 a4
#else
callx4 a4
#endif
#endif
movi a4, board_init_f_alloc_reserve
#ifdef __XTENSA_CALL0_ABI__
mov a2, sp
callx0 a4
mov sp, a2
#else
mov a6, sp
callx4 a4
movsp sp, a6
#endif
movi a4, board_init_f_init_reserve
#ifdef __XTENSA_CALL0_ABI__
callx0 a4
#else
callx4 a4
#endif
/*
* Call board initialization routine (never returns).
*/
movi a4, board_init_f
#ifdef __XTENSA_CALL0_ABI__
movi a2, 0
callx0 a4
#else
movi a6, 0
callx4 a4
#endif
/* Never Returns */
ill
/*
* void relocate_code (addr_sp, gd, addr_moni)
*
* This "function" does not return, instead it continues in RAM
* after relocating the monitor code.
*
* a2 = addr_sp
* a3 = gd
* a4 = destination address
*/
.text
.globl relocate_code
.align 4
relocate_code:
abi_entry
#ifdef __XTENSA_CALL0_ABI__
mov a1, a2
mov a2, a3
mov a3, a4
movi a0, board_init_r
callx0 a0
#else
/* We can't movsp here, because the chain of stack frames may cross
* the now reserved memory. We need to toss all window frames except
* the current, create new pristine stack frame and start from scratch.
*/
rsr a0, windowbase
ssl a0
movi a0, 1
sll a0, a0
wsr a0, windowstart
rsync
movi a0, 0
/* Reserve 16-byte save area */
addi sp, a2, -16
mov a6, a3
mov a7, a4
movi a4, board_init_r
callx4 a4
#endif
ill
#if XCHAL_HAVE_EXCEPTIONS
/*
* Exception vectors.
*
* Various notes:
* - We currently don't use the user exception vector (PS.UM is always 0),
* but do define such a vector, just in case. They both jump to the
* same exception handler, though.
* - We currently only save the bare minimum number of registers:
* a0...a15, sar, loop-registers, exception register (epc1, excvaddr,
* exccause, depc)
* - WINDOWSTART is only saved to identify if registers have been spilled
* to the wrong stack (exception stack) while executing the exception
* handler.
*/
.section .KernelExceptionVector.text, "ax"
.global _KernelExceptionVector
_KernelExceptionVector:
wsr a2, EXCSAVE1
movi a2, ExceptionHandler
jx a2
.section .UserExceptionVector.text, "ax"
.global _UserExceptionVector
_UserExceptionVector:
wsr a2, EXCSAVE1
movi a2, ExceptionHandler
jx a2
#if !XCHAL_HAVE_XEA1
.section .DoubleExceptionVector.text, "ax"
.global _DoubleExceptionVector
_DoubleExceptionVector:
#ifdef __XTENSA_CALL0_ABI__
wsr a0, EXCSAVE1
movi a0, hang # report and ask user to reset board
callx0 a0
#else
wsr a4, EXCSAVE1
movi a4, hang # report and ask user to reset board
callx4 a4
#endif
#endif
/* Does not return here */
.text
.align 4
ExceptionHandler:
rsr a2, EXCCAUSE # find handler
#if XCHAL_HAVE_WINDOWED
/* Special case for alloca handler */
bnei a2, 5, 1f # jump if not alloca exception
addi a1, a1, -16 - 4 # create a small stack frame
s32i a3, a1, 0 # and save a3 (a2 still in excsave1)
movi a2, fast_alloca_exception
jx a2 # jump to fast_alloca_exception
#endif
/* All other exceptions go here: */
/* Create ptrace stack and save a0...a3 */
1: addi a2, a1, - PT_SIZE - 16
s32i a0, a2, PT_AREG + 0 * 4
s32i a1, a2, PT_AREG + 1 * 4
s32i a3, a2, PT_AREG + 3 * 4
rsr a3, EXCSAVE1
s32i a3, a2, PT_AREG + 2 * 4
mov a1, a2
/* Save remaining AR registers */
s32i a4, a1, PT_AREG + 4 * 4
s32i a5, a1, PT_AREG + 5 * 4
s32i a6, a1, PT_AREG + 6 * 4
s32i a7, a1, PT_AREG + 7 * 4
s32i a8, a1, PT_AREG + 8 * 4
s32i a9, a1, PT_AREG + 9 * 4
s32i a10, a1, PT_AREG + 10 * 4
s32i a11, a1, PT_AREG + 11 * 4
s32i a12, a1, PT_AREG + 12 * 4
s32i a13, a1, PT_AREG + 13 * 4
s32i a14, a1, PT_AREG + 14 * 4
s32i a15, a1, PT_AREG + 15 * 4
/* Save SRs */
#if XCHAL_HAVE_WINDOWED
rsr a2, WINDOWSTART
s32i a2, a1, PT_WINDOWSTART
#endif
rsr a2, SAR
rsr a3, EPC1
rsr a4, EXCVADDR
s32i a2, a1, PT_SAR
s32i a3, a1, PT_PC
s32i a4, a1, PT_EXCVADDR
#if XCHAL_HAVE_LOOPS
movi a2, 0
rsr a3, LBEG
xsr a2, LCOUNT
s32i a3, a1, PT_LBEG
rsr a3, LEND
s32i a2, a1, PT_LCOUNT
s32i a3, a1, PT_LEND
#endif
/* Set up C environment and call registered handler */
/* Setup stack, PS.WOE = 1, PS.EXCM = 0, PS.INTLEVEL = EXCM level. */
rsr a2, EXCCAUSE
#if XCHAL_HAVE_XEA1
movi a3, (1<<PS_WOE_BIT) | 1
#elif __XTENSA_CALL0_ABI__
movi a3, XCHAL_EXCM_LEVEL
#else
movi a3, (1<<PS_WOE_BIT) | XCHAL_EXCM_LEVEL
#endif
xsr a3, PS
rsync
s32i a2, a1, PT_EXCCAUSE
s32i a3, a1, PT_PS
movi a0, exc_table
addx4 a0, a2, a0
l32i a0, a0, 0
#ifdef __XTENSA_CALL0_ABI__
mov a2, a1 # Provide stack frame as only argument
callx0 a0
l32i a3, a1, PT_PS
#else
mov a6, a1 # Provide stack frame as only argument
callx4 a0
#endif
/* Restore PS and go to exception mode (PS.EXCM=1) */
wsr a3, PS
/* Restore SR registers */
#if XCHAL_HAVE_LOOPS
l32i a2, a1, PT_LBEG
l32i a3, a1, PT_LEND
l32i a4, a1, PT_LCOUNT
wsr a2, LBEG
wsr a3, LEND
wsr a4, LCOUNT
#endif
l32i a2, a1, PT_SAR
l32i a3, a1, PT_PC
wsr a2, SAR
wsr a3, EPC1
#if XCHAL_HAVE_WINDOWED
/* Do we need to simulate a MOVSP? */
l32i a2, a1, PT_WINDOWSTART
addi a3, a2, -1
and a2, a2, a3
beqz a2, 1f # Skip if regs were spilled before exc.
rsr a2, WINDOWSTART
addi a3, a2, -1
and a2, a2, a3
bnez a2, 1f # Skip if registers aren't spilled now
addi a2, a1, -16
l32i a4, a2, 0
l32i a5, a2, 4
s32i a4, a1, PT_SIZE + 0
s32i a5, a1, PT_SIZE + 4
l32i a4, a2, 8
l32i a5, a2, 12
s32i a4, a1, PT_SIZE + 8
s32i a5, a1, PT_SIZE + 12
#endif
/* Restore address register */
1: l32i a15, a1, PT_AREG + 15 * 4
l32i a14, a1, PT_AREG + 14 * 4
l32i a13, a1, PT_AREG + 13 * 4
l32i a12, a1, PT_AREG + 12 * 4
l32i a11, a1, PT_AREG + 11 * 4
l32i a10, a1, PT_AREG + 10 * 4
l32i a9, a1, PT_AREG + 9 * 4
l32i a8, a1, PT_AREG + 8 * 4
l32i a7, a1, PT_AREG + 7 * 4
l32i a6, a1, PT_AREG + 6 * 4
l32i a5, a1, PT_AREG + 5 * 4
l32i a4, a1, PT_AREG + 4 * 4
l32i a3, a1, PT_AREG + 3 * 4
l32i a2, a1, PT_AREG + 2 * 4
l32i a0, a1, PT_AREG + 0 * 4
l32i a1, a1, PT_AREG + 1 * 4 # Remove ptrace stack frame
rfe
#endif /* XCHAL_HAVE_EXCEPTIONS */
#if XCHAL_HAVE_WINDOWED
/*
* Window overflow and underflow handlers.
* The handlers must be 64 bytes apart, first starting with the underflow
* handlers underflow-4 to underflow-12, then the overflow handlers
* overflow-4 to overflow-12.
*
* Note: We rerun the underflow handlers if we hit an exception, so
* we try to access any page that would cause a page fault early.
*/
.section .WindowVectors.text, "ax"
/* 4-Register Window Overflow Vector (Handler) */
.align 64
.global _WindowOverflow4
_WindowOverflow4:
s32e a0, a5, -16
s32e a1, a5, -12
s32e a2, a5, -8
s32e a3, a5, -4
rfwo
/* 4-Register Window Underflow Vector (Handler) */
.align 64
.global _WindowUnderflow4
_WindowUnderflow4:
l32e a0, a5, -16
l32e a1, a5, -12
l32e a2, a5, -8
l32e a3, a5, -4
rfwu
/*
* a0: a0
* a1: new stack pointer = a1 - 16 - 4
* a2: available, saved in excsave1
* a3: available, saved on stack *a1
*/
/* 15*/ .byte 0xff
fast_alloca_exception: /* must be at _WindowUnderflow4 + 16 */
/* 16*/ rsr a2, PS
/* 19*/ rsr a3, WINDOWBASE
/* 22*/ extui a2, a2, PS_OWB_SHIFT, PS_OWB_SHIFT
/* 25*/ xor a2, a2, a3
/* 28*/ rsr a3, PS
/* 31*/ slli a2, a2, PS_OWB_SHIFT
/* 34*/ xor a2, a3, a2
/* 37*/ wsr a2, PS
/* 40*/ _l32i a3, a1, 0
/* 43*/ addi a1, a1, 16 + 4
/* 46*/ rsr a2, EXCSAVE1
/* 49*/ rotw -1
/* 52*/ _bbci.l a4, 31, _WindowUnderflow4 /* 0x: call4 */
/* 55*/ rotw -1
/* 58*/ _bbci.l a8, 30, _WindowUnderflow8 /* 10: call8 */
/* 61*/ _j __WindowUnderflow12 /* 11: call12 */
/* 64*/
/* 8-Register Window Overflow Vector (Handler) */
.align 64
.global _WindowOverflow8
_WindowOverflow8:
s32e a0, a9, -16
l32e a0, a1, -12
s32e a2, a9, -8
s32e a1, a9, -12
s32e a3, a9, -4
s32e a4, a0, -32
s32e a5, a0, -28
s32e a6, a0, -24
s32e a7, a0, -20
rfwo
/* 8-Register Window Underflow Vector (Handler) */
.align 64
.global _WindowUnderflow8
_WindowUnderflow8:
l32e a1, a9, -12
l32e a0, a9, -16
l32e a7, a1, -12
l32e a2, a9, -8
l32e a4, a7, -32
l32e a3, a9, -4
l32e a5, a7, -28
l32e a6, a7, -24
l32e a7, a7, -20
rfwu
/* 12-Register Window Overflow Vector (Handler) */
.align 64
.global _WindowOverflow12
_WindowOverflow12:
s32e a0, a13, -16
l32e a0, a1, -12
s32e a1, a13, -12
s32e a2, a13, -8
s32e a3, a13, -4
s32e a4, a0, -48
s32e a5, a0, -44
s32e a6, a0, -40
s32e a7, a0, -36
s32e a8, a0, -32
s32e a9, a0, -28
s32e a10, a0, -24
s32e a11, a0, -20
rfwo
/* 12-Register Window Underflow Vector (Handler) */
.org _WindowOverflow12 + 64 - 3
__WindowUnderflow12:
rotw -1
.global _WindowUnderflow12
_WindowUnderflow12:
l32e a1, a13, -12
l32e a0, a13, -16
l32e a11, a1, -12
l32e a2, a13, -8
l32e a4, a11, -48
l32e a8, a11, -32
l32e a3, a13, -4
l32e a5, a11, -44
l32e a6, a11, -40
l32e a7, a11, -36
l32e a9, a11, -28
l32e a10, a11, -24
l32e a11, a11, -20
rfwu
#endif /* XCHAL_HAVE_WINDOWED */